From 4447800c6aa97b5a63c02bacd3d0cd19f84bb5bc Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 6 Oct 2020 15:12:05 +0000 Subject: [PATCH 01/26] notify on fail orb --- .circleci/config.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0060192..8f49ef4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,7 @@ -version: 2 +version: 2.1 +orbs: + slack: circleci/slack@3.4.2 + jobs: build: docker: @@ -37,6 +40,9 @@ jobs: --password=$SANDBOX_PASSWORD \ --client-id=50 \ tests + - slack/notify-on-failure: + only_for_branches: master + workflows: version: 2 commit: From 067258d731a124c2803dae92deafe8057ff204bc Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 29 Mar 2021 14:33:33 +0000 Subject: [PATCH 02/26] incremental test changes; set time/datetime expectations to handle dst, add explicit assertions to ensure order of records by replication key, cleanup --- .../test_postgres_incremental_replication.py | 81 +++++++++++-------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/tests/test_postgres_incremental_replication.py b/tests/test_postgres_incremental_replication.py index 80b8871..8555227 100644 --- a/tests/test_postgres_incremental_replication.py +++ b/tests/test_postgres_incremental_replication.py @@ -145,7 +145,7 @@ def setUp(self): }) self.expected_records.append({ 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': '10:09:08', + 'OUR TIME': str(our_time), 'our_text': 'some text 2', 'our_bit': True, 'our_integer': 44101, @@ -155,13 +155,13 @@ def setUp(self): 'our_boolean': True, 'our_jsonb': '{"burgers": "good++"}', 'our_bigint': 1000001, - 'OUR TIME TZ': '10:09:08-04:00', + 'OUR TIME TZ': str(our_time_tz), 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': '1977-03-03T08:03:03.733184+00:00', + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), 'our_smallint': 2, 'OUR DATE': '1964-07-01T00:00:00+00:00', 'our_varchar': 'our_varchar 2', - 'OUR TS': '1977-03-03T03:03:03.733184+00:00', + 'OUR TS': self.expected_ts(our_ts), 'our_uuid': self.inserted_records[0]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', @@ -171,7 +171,6 @@ def setUp(self): 'our_mac' : self.inserted_records[0]['our_mac'], 'our_money' : None }) - # record 2 our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) our_ts_tz = nyc_tz.localize(our_ts) @@ -215,10 +214,10 @@ def setUp(self): 'our_boolean': True, 'our_jsonb': self.inserted_records[1]['our_jsonb'], 'our_bigint': 1000000, - 'OUR TS': '1987-02-02T02:02:02.722184+00:00', - 'OUR TS TZ': '1987-02-02T07:02:02.722184+00:00', - 'OUR TIME': '12:11:10', - 'OUR TIME TZ': '12:11:10-04:00', + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), 'our_store': {"name" : "betty", "size" :"small"}, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', @@ -250,7 +249,7 @@ def setUp(self): quote_ident('OUR TIME', cur) : our_time, quote_ident('OUR TIME TZ', cur) : our_time_tz, quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), + 'our_double' : '1.1', 'our_real' : decimal.Decimal('1.2'), 'our_boolean' : True, 'our_bit' : '0', @@ -275,10 +274,10 @@ def setUp(self): 'our_boolean': True, 'our_jsonb': self.inserted_records[1]['our_jsonb'], 'our_bigint': 1000000, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME': '12:11:10', - 'OUR TIME TZ': '12:11:10-04:00', + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), 'our_store': {"name" : "betty", "size" :"small"}, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', @@ -340,6 +339,18 @@ def get_properties(): 'default_replication_method' : 'LOG_BASED' } + def expected_ts_tz(self, our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def expected_ts(self, our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def test_run(self): conn_id = connections.ensure_connection(self) @@ -404,12 +415,16 @@ def test_run(self): self.assertDictEqual(self.expected_records[1], messages[2]['data']) self.assertDictEqual(self.expected_records[2], messages[3]['data']) + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + print("records are correct") # grab bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] # verify state and bookmarks meet expectations self.assertIsNone(state['currently_syncing']) @@ -485,10 +500,10 @@ def test_run(self): 'our_smallint' : 2, 'our_bigint' : 1000001, 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : '1996-04-04T04:04:04.733184+00:00', - 'OUR TS TZ' : '1996-04-04T08:04:04.733184+00:00', - 'OUR TIME' : '06:06:06', - 'OUR TIME TZ' : '06:06:06-04:00', + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), 'OUR DATE' : '1970-07-01T00:00:00+00:00', 'our_double' : decimal.Decimal('1.1'), 'our_real' : decimal.Decimal('1.2'), @@ -550,10 +565,10 @@ def test_run(self): 'our_boolean': True, 'our_jsonb': self.inserted_records[-1]['our_jsonb'], 'our_bigint': 100000, - 'OUR TS': '2007-01-01T12:12:12.222111+00:00', - 'OUR TS TZ': '2007-01-01T17:12:12.222111+00:00', - 'OUR TIME': '12:11:10', - 'OUR TIME TZ': '12:11:10-04:00', + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), 'our_store': {"name" : "betty", "size" :"small"}, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', @@ -613,10 +628,10 @@ def test_run(self): 'our_boolean': True, 'our_jsonb': self.inserted_records[-1]['our_jsonb'], 'our_bigint': 100000, - 'OUR TS': '2111-01-01T12:12:12.222111+00:00', - 'OUR TS TZ': '2111-01-01T17:12:12.222111+00:00', - 'OUR TIME': '12:11:10', - 'OUR TIME TZ': '12:11:10-04:00', + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), 'our_store': {"name" : "betty", "size" :"small"}, 'our_smallint': 1, 'OUR DATE': '1999-09-09T00:00:00+00:00', @@ -645,7 +660,7 @@ def test_run(self): "our_double": decimal.Decimal("6.6"), "our_money": "$0.00" } - self.expected_records[0]["OUR TS TZ"] = '2021-04-04T08:04:04.733184+00:00' + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) self.expected_records[0]["our_double"] = decimal.Decimal("6.6") self.expected_records[0]["our_money"] = "$0.00" db_utils.update_record(cur, canon_table_name, record_pk, updated_data) @@ -660,7 +675,7 @@ def test_run(self): "our_double": decimal.Decimal("6.6"), "our_money": "$0.00" } - self.expected_records[1]["OUR TS TZ"] = '2021-04-04T08:04:04.733184+00:00' + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) self.expected_records[1]["our_double"] = decimal.Decimal("6.6") self.expected_records[1]["our_money"] = "$0.00" db_utils.update_record(cur, canon_table_name, record_pk, updated_data) @@ -706,7 +721,7 @@ def test_run(self): expected_record_id = self.expected_records[3]['id'] self.assertNotIn(expected_record_id, actual_record_ids) - # verify the deleted record with a lower replication-key value was NOT replicated + # verify the deleted record with a lower replication-key value was NOT replicated expected_record_id = self.expected_records[4]['id'] self.assertNotIn(expected_record_id, actual_record_ids) @@ -717,12 +732,15 @@ def test_run(self): # verify the expected inserted record with a higher replication-key value was replicated self.assertDictEqual(self.expected_records[5], messages[3]['data']) + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + print("records are correct") # get bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] # verify the bookmarked state matches our expectations self.assertIsNone(bookmark.get('lsn')) @@ -779,7 +797,6 @@ def test_run(self): # get bookmarked state state = menagerie.get_state(conn_id) bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] # verify the bookmarked state matches our expectations self.assertIsNone(bookmark.get('lsn')) From e377bf69a6dbec6504a1842c7952ab333f3618c2 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 29 Mar 2021 16:11:40 +0000 Subject: [PATCH 03/26] upgrade the full table replication test to include various inserts, updates, and deletes --- tests/test_postgres_full_table_replication.py | 882 ++++++++++++------ 1 file changed, 585 insertions(+), 297 deletions(-) diff --git a/tests/test_postgres_full_table_replication.py b/tests/test_postgres_full_table_replication.py index 23f6054..d9d5f0c 100644 --- a/tests/test_postgres_full_table_replication.py +++ b/tests/test_postgres_full_table_replication.py @@ -64,22 +64,6 @@ test_schema_name = "public" test_table_name = "postgres_full_table_replication_test" -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - class PostgresFullTable(unittest.TestCase): @@ -92,26 +76,11 @@ def setUp(self): with db_utils.get_test_connection('dev') as conn: conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, @@ -141,11 +110,15 @@ def setUp(self): our_mac macaddr, our_alignment_enum ALIGNMENT, our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur), NUMERIC_PRECISION, NUMERIC_SCALE) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) cur.execute(create_table_sql) - #insert fixture data 1 + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) @@ -153,38 +126,68 @@ def setUp(self): our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1998, 3, 4) my_uuid = str(uuid.uuid1()) - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - } - - insert_record(cur, test_table_name, self.rec_1) - - - #insert fixture data 2 + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + # 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + # 'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME': str(our_time), + # 'OUR TIME': '12:11:10', + 'OUR TIME TZ': str(our_time_tz), + # 'OUR TIME TZ': '12:11:10-04:00', + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) nyc_tz = pytz.timezone('America/New_York') our_ts_tz = nyc_tz.localize(our_ts) @@ -192,43 +195,107 @@ def setUp(self): our_time_tz = our_time.isoformat() + "-04:00" our_date = datetime.date(1964, 7, 1) my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - } - - insert_record(cur, test_table_name, self.rec_2) - - self.rec_3 = {'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - } - - insert_record(cur, test_table_name, self.rec_3) - + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + # 'OUR TIME': '10:09:08', + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + # 'OUR TIME TZ': '10:09:08-04:00', + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + # 'OUR TS TZ': '1987-03-03T08:03:03.733184+00:00', + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + # 'OUR TS': '1987-03-03T03:03:03.733184+00:00', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) @staticmethod def expected_check_streams(): @@ -273,6 +340,18 @@ def get_properties(): 'itersize' : '10' } + def expected_ts_tz(self, our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def expected_ts(self, our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def test_run(self): conn_id = connections.ensure_connection(self) @@ -283,247 +362,456 @@ def test_run(self): exit_status = menagerie.get_exit_status(conn_id, check_job_name) menagerie.verify_check_exit_status(self, exit_status, check_job_name) - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) # verify that persisted streams have the correct properties test_catalog = found_catalogs[0] - print('Catalog', test_catalog) - self.assertEqual('postgres_full_table_replication_test', test_catalog['stream_name']) - + self.assertEqual(test_table_name, test_catalog['stream_name']) print("discovered streams are correct") - print('checking discoverd metadata for public-postgres_full_table_test...') - md = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id'])['metadata'] - - self.assertEqual( - {('properties', 'our_varchar'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'character varying'}, - ('properties', 'our_boolean'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'boolean'}, - ('properties', 'our_real'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'real'}, - ('properties', 'our_uuid'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'uuid'}, - ('properties', 'our_bit'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'bit'}, - ('properties', 'OUR TS TZ'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'timestamp with time zone'}, - ('properties', 'our_varchar_10'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'character varying'}, - ('properties', 'our_store'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'hstore'}, - ('properties', 'our_citext'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'citext'}, - ('properties', 'OUR TIME'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'time without time zone'}, - ('properties', 'our_decimal'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'numeric'}, - ('properties', 'OUR TS'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'timestamp without time zone'}, - ('properties', 'our_jsonb'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'jsonb'}, - ('properties', 'OUR TIME TZ'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'time with time zone'}, - ('properties', 'our_text'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'text'}, - ('properties', 'OUR DATE'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'date'}, - ('properties', 'our_double'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'double precision'}, - (): {'is-view': False, 'schema-name': 'public', 'table-key-properties': ['id'], 'database-name': 'dev', 'row-count': 0}, - ('properties', 'our_bigint'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'bigint'}, - ('properties', 'id'): {'inclusion': 'automatic', 'selected-by-default': True, 'sql-datatype': 'integer'}, - ('properties', 'our_json'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'json'}, - ('properties', 'our_smallint'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'smallint'}, - ('properties', 'our_integer'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'integer'}, - ('properties', 'our_inet'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'inet'}, - ('properties', 'our_cidr'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'cidr'}, - ('properties', 'our_mac'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'macaddr'}, - ('properties', 'our_alignment_enum'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'alignment'}, - ('properties', 'our_money'): {'inclusion': 'available', 'selected-by-default': True, 'sql-datatype': 'money'}}, - metadata.to_map(md)) - + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) # clear state menagerie.set_state(conn_id, {}) + # run sync job 1 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) - self.assertEqual(record_count_by_stream, { 'postgres_full_table_replication_test': 3}) - records_by_stream = runner.get_records_from_target_output() + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) - table_version = records_by_stream['postgres_full_table_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][4]['action'], - 'activate_version') - - # verifications about individual records - for table_name, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[table_name], - msg="Persisted schema did not match expected schema for table `{}`.".format(table_name)) - - expected_record_1 = {'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME': '12:11:10', - 'OUR TIME TZ': '12:11:10-04:00', - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.rec_1['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.rec_1['our_citext'], - 'our_inet' : self.rec_1['our_inet'], - 'our_cidr' : self.rec_1['our_cidr'], - 'our_mac' : self.rec_1['our_mac'], - 'our_alignment_enum' : self.rec_1['our_alignment_enum'], - 'our_money' : '$100.11' - } + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - expected_record_2 = {'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': '10:09:08', - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': '10:09:08-04:00', - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': '1987-03-03T08:03:03.733184+00:00', - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': '1987-03-03T03:03:03.733184+00:00', - 'our_uuid': self.rec_2['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.rec_2['our_citext'], - 'our_inet' : self.rec_2['our_inet'], - 'our_cidr' : self.rec_2['our_cidr'], - 'our_mac' : self.rec_2['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - } + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) - actual_record_1 = records_by_stream['postgres_full_table_replication_test']['messages'][1] - self.assertEqual(set(actual_record_1['data'].keys()), set(expected_record_1.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_record_1.keys())))) + print("records are correct") - for k,v in actual_record_1['data'].items(): - self.assertEqual(actual_record_1['data'][k], expected_record_1[k], msg="{} != {} for key {}".format(actual_record_1['data'][k], expected_record_1[k], k)) + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - actual_record_2 = records_by_stream['postgres_full_table_replication_test']['messages'][2] - self.assertEqual(set(actual_record_2['data'].keys()), set(expected_record_2.keys()), - msg="keys for expected_record_2 are wrong: {}".format(set(actual_record_2.keys()).symmetric_difference(set(expected_record_2.keys())))) + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) - for k,v in actual_record_2['data'].items(): - self.assertEqual(actual_record_2['data'][k], expected_record_2[k], msg="{} != {} for key {}".format(actual_record_2['data'][k], expected_record_2[k], k)) + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- - #We cast NaN's, +Inf, -Inf to NULL as wal2json does not support them and now we are at least consistent(ly wrong) - expected_record_3 = {'our_decimal' : None, - 'our_double' : None, - 'our_real' : None} - actual_record_3 = records_by_stream['postgres_full_table_replication_test']['messages'][3] - for k,v in expected_record_3.items(): - self.assertEqual(actual_record_3['data'][k], v, msg="{} != {} for key {}".format(actual_record_3['data'][k], v, k)) + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] - print("records are correct") + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) - # verify state and bookmarks - state = menagerie.get_state(conn_id) + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + # grab bookmarked state + state = menagerie.get_state(conn_id) bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream ROOT-CHICKEN to have NO lsn because we are using full-table replication") - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records + # invoke the sync job AGAIN following various manipulations to the data #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) exit_status = menagerie.get_exit_status(conn_id, sync_job_name) menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_full_table_replication_test': 3}) + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) - new_table_version = records_by_stream['postgres_full_table_replication_test']['table_version'] + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][0]['action'], - 'upsert') + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][1]['action'], - 'upsert') + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][2]['action'], - 'upsert') - self.assertEqual(records_by_stream['postgres_full_table_replication_test']['messages'][3]['action'], - 'activate_version') + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: - new_table_version = records_by_stream['postgres_full_table_replication_test']['table_version'] + # Prior to Sync 1 + # insert 0, 1, 2 - self.assertGreater(new_table_version, table_version, - msg="table version {} didn't increate from {} on the second run".format(new_table_version, table_version)) + # Prior to Sync 2 + # No db changes - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) SCENARIOS.add(PostgresFullTable) From d39d4e7f3d3dd32cbbf8e89c290fda92de09fde7 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 29 Mar 2021 16:47:40 +0000 Subject: [PATCH 04/26] pylint fixes --- tests/test_postgres_full_table_replication.py | 7 ++++--- tests/test_postgres_incremental_replication.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_postgres_full_table_replication.py b/tests/test_postgres_full_table_replication.py index d9d5f0c..a0854ca 100644 --- a/tests/test_postgres_full_table_replication.py +++ b/tests/test_postgres_full_table_replication.py @@ -8,7 +8,6 @@ import psycopg2.extras from psycopg2.extensions import quote_ident import pytz -from singer import metadata from tap_tester.scenario import (SCENARIOS) import tap_tester.connections as connections import tap_tester.menagerie as menagerie @@ -340,13 +339,15 @@ def get_properties(): 'itersize' : '10' } - def expected_ts_tz(self, our_ts_tz): + @staticmethod + def expected_ts_tz(our_ts_tz): our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") return expected_value - def expected_ts(self, our_ts): + @staticmethod + def expected_ts(our_ts): expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") return expected_value diff --git a/tests/test_postgres_incremental_replication.py b/tests/test_postgres_incremental_replication.py index 8555227..369c707 100644 --- a/tests/test_postgres_incremental_replication.py +++ b/tests/test_postgres_incremental_replication.py @@ -339,13 +339,15 @@ def get_properties(): 'default_replication_method' : 'LOG_BASED' } - def expected_ts_tz(self, our_ts_tz): + @staticmethod + def expected_ts_tz(our_ts_tz): our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") return expected_value - def expected_ts(self, our_ts): + @staticmethod + def expected_ts(our_ts): expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") return expected_value From 581a0f91462549c1c7f4e54bc20b3039197f3338 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 29 Mar 2021 16:52:08 +0000 Subject: [PATCH 05/26] cleanup unnecessary comments --- tests/test_postgres_full_table_replication.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test_postgres_full_table_replication.py b/tests/test_postgres_full_table_replication.py index a0854ca..7698f7f 100644 --- a/tests/test_postgres_full_table_replication.py +++ b/tests/test_postgres_full_table_replication.py @@ -165,13 +165,9 @@ def setUp(self): 'our_jsonb': '{"burgers": "good"}', 'our_bigint': 1000000, 'OUR TS': self.expected_ts(our_ts), - # 'OUR TS': '1997-02-02T02:02:02.722184+00:00', 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - # 'OUR TS TZ': '1997-02-02T07:02:02.722184+00:00', 'OUR TIME': str(our_time), - # 'OUR TIME': '12:11:10', 'OUR TIME TZ': str(our_time_tz), - # 'OUR TIME TZ': '12:11:10-04:00', 'our_store': {"name" : "betty", "size" :"small"}, 'our_smallint': 1, 'OUR DATE': '1998-03-04T00:00:00+00:00', @@ -224,7 +220,6 @@ def setUp(self): self.expected_records.append({ 'our_decimal': decimal.Decimal('.02'), 'OUR TIME': str(our_time), - # 'OUR TIME': '10:09:08', 'our_text': 'some text 2', 'our_bit': True, 'our_integer': 44101, @@ -235,15 +230,12 @@ def setUp(self): 'our_jsonb': '{"burgers": "good++"}', 'our_bigint': 1000001, 'OUR TIME TZ': str(our_time_tz), - # 'OUR TIME TZ': '10:09:08-04:00', 'our_store': {"name" : "betty", "dances" :"floor"}, 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - # 'OUR TS TZ': '1987-03-03T08:03:03.733184+00:00', 'our_smallint': 2, 'OUR DATE': '1964-07-01T00:00:00+00:00', 'our_varchar': 'our_varchar 2', 'OUR TS': self.expected_ts(our_ts), - # 'OUR TS': '1987-03-03T03:03:03.733184+00:00', 'our_uuid': self.inserted_records[1]['our_uuid'], 'our_real': decimal.Decimal('1.2'), 'our_varchar_10': 'varchar_10', From 79538430fac1a621f8d3aa0fca7765e697c4eae0 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 30 Mar 2021 11:48:02 +0000 Subject: [PATCH 06/26] Note known bug in discovery, add auto fields test --- tests/test_postgres_automatic_fields.py | 478 ++++++++++++++++++++++++ tests/test_postgres_discovery.py | 4 + 2 files changed, 482 insertions(+) create mode 100644 tests/test_postgres_automatic_fields.py diff --git a/tests/test_postgres_automatic_fields.py b/tests/test_postgres_automatic_fields.py new file mode 100644 index 0000000..a153cce --- /dev/null +++ b/tests/test_postgres_automatic_fields.py @@ -0,0 +1,478 @@ +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" + + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'OUR TS TZ'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def select_streams_and_fields(conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(log_based_conn_id) + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify messages match expectations specific to full table replication + if self.default_replication_method is self.FULL_TABLE: + self.assertEqual('activate_version', messages[4]['action']) + self.assertEqual(5, len(messages)) + else: + self.assertEqual(4, len(messages)) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) diff --git a/tests/test_postgres_discovery.py b/tests/test_postgres_discovery.py index b5f123d..fa6cb93 100644 --- a/tests/test_postgres_discovery.py +++ b/tests/test_postgres_discovery.py @@ -416,6 +416,10 @@ def discovery_test(self, conn_id): expected_replication_keys, actual_replication_keys ) + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + # Verify primary key(s) match expectations self.assertSetEqual( expected_primary_keys, actual_primary_keys, From 0d0de66829f61b64d9205ba94d4d7e2d4e03db7b Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 30 Mar 2021 15:08:32 +0000 Subject: [PATCH 07/26] fix existing issue in logical_replication_multiple_dbs_test --- ...res_logical_replication_multiple_tables.py | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/tests/test_postgres_logical_replication_multiple_tables.py b/tests/test_postgres_logical_replication_multiple_tables.py index 36c716a..da30672 100644 --- a/tests/test_postgres_logical_replication_multiple_tables.py +++ b/tests/test_postgres_logical_replication_multiple_tables.py @@ -51,8 +51,8 @@ class PostgresLogicalRepMultipleTables(unittest.TestCase): def tearDown(self): with db_utils.get_test_connection('dev') as conn: conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) def setUp(self): db_utils.ensure_environment_variables_set() @@ -245,19 +245,32 @@ def test_run(self): with db_utils.get_test_connection('dev') as conn: conn.autocommit = True with conn.cursor() as cur: - #insert another cow + # insert another cow self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None - #insert another chicken + # insert another chicken self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None sync_job_name = runner.run_sync_mode(self, conn_id) @@ -271,14 +284,14 @@ def test_run(self): self.expected_sync_streams(), self.expected_pks()) self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - upserts = runner.get_upserts_from_target_output() - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}, - {'_sdc_deleted_at': None, 'cow_age': 10, 'id': 3, 'cow_name': 'cindy cow'}, - {'chicken_name': 'carl chicken', '_sdc_deleted_at': None, 'chicken_age': 4, 'id': 3}], - upserts) + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) print("inserted record is correct") From c51233594c77b2b66177857705d04dbe0e2855d2 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 30 Mar 2021 15:10:30 +0000 Subject: [PATCH 08/26] parametrize automatic fields test for different replication methods --- tests/test_postgres_automatic_fields.py | 101 ++++++++++-------------- 1 file changed, 42 insertions(+), 59 deletions(-) diff --git a/tests/test_postgres_automatic_fields.py b/tests/test_postgres_automatic_fields.py index a153cce..e147069 100644 --- a/tests/test_postgres_automatic_fields.py +++ b/tests/test_postgres_automatic_fields.py @@ -62,7 +62,7 @@ test_schema_name = "public" test_table_name = "postgres_automatic_fields_test" - +test_db = "dev" class PostgresAutomaticFields(unittest.TestCase): INCREMENTAL = "INCREMENTAL" @@ -71,6 +71,12 @@ class PostgresAutomaticFields(unittest.TestCase): default_replication_method = "" + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + def setUp(self): db_utils.ensure_environment_variables_set() @@ -134,7 +140,7 @@ def setUp(self): 'our_varchar' : "our_varchar", 'our_varchar_10' : "varchar_10", 'our_text' : "some text", - 'our_integer' : 44100, + 'our_integer' : 19972, 'our_smallint' : 1, 'our_bigint' : 1000000, 'our_decimal' : decimal.Decimal('.01'), @@ -162,7 +168,7 @@ def setUp(self): 'our_decimal': decimal.Decimal('.01'), 'our_text': 'some text', 'our_bit': False, - 'our_integer': 44100, + 'our_integer': 19972, 'our_double': decimal.Decimal('1.1'), 'id': 1, 'our_json': '{"secret": 55}', @@ -199,7 +205,7 @@ def setUp(self): 'our_varchar' : "our_varchar 2", 'our_varchar_10' : "varchar_10", 'our_text' : "some text 2", - 'our_integer' : 44101, + 'our_integer' : 19873, 'our_smallint' : 2, 'our_bigint' : 1000001, 'our_decimal' : decimal.Decimal('.02'), @@ -227,7 +233,7 @@ def setUp(self): 'OUR TIME': str(our_time), 'our_text': 'some text 2', 'our_bit': True, - 'our_integer': 44101, + 'our_integer': 19873, 'our_double': decimal.Decimal('1.1'), 'id': 2, 'our_json': '["nymn 77"]', @@ -251,44 +257,6 @@ def setUp(self): 'our_alignment_enum' : None, 'our_money': None }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) for record in self.inserted_records: db_utils.insert_record(cur, test_table_name, record) @@ -309,7 +277,7 @@ def expected_primary_keys(): def expected_replication_keys(self): replication_keys = { - 'postgres_automatic_fields_test' : {'OUR TS TZ'} + 'postgres_automatic_fields_test' : {'our_integer'} } if self.default_replication_method == self.INCREMENTAL: @@ -370,12 +338,27 @@ def expected_ts(our_ts): return expected_value - @staticmethod - def select_streams_and_fields(conn_id, catalog, select_all_fields: bool = False): + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): """Select all streams and all fields within streams or all streams and no fields.""" schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] non_selected_properties = [] if not select_all_fields: @@ -391,16 +374,22 @@ def test_run(self): """Parametrized automatic fields test running against each replication method.""" self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) + full_table_conn_id = connections.ensure_connection(self) self.automatic_fields_test(full_table_conn_id) + # BUG? | We can't run a sync because replication-key isn't automatic # self.default_replication_method = self.INCREMENTAL # incremental_conn_id = connections.ensure_connection(self, original_properties=False) # self.automatic_fields_test(incremental_conn_id) - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(log_based_conn_id) + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + def automatic_fields_test(self, conn_id): """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" @@ -455,17 +444,11 @@ def automatic_fields_test(self, conn_id): record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) self.assertEqual('activate_version', messages[0]['action']) self.assertEqual('upsert', messages[1]['action']) self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify messages match expectations specific to full table replication - if self.default_replication_method is self.FULL_TABLE: - self.assertEqual('activate_version', messages[4]['action']) - self.assertEqual(5, len(messages)) - else: - self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[3]['action']) # Verify that you get some records for each stream self.assertGreater(record_count_by_stream[test_table_name], 0) From d9457044a4d54952b1ab930ed9c0034efebd575a Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 30 Mar 2021 15:51:15 +0000 Subject: [PATCH 09/26] pylint fixes for auto fields test --- tests/test_postgres_automatic_fields.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test_postgres_automatic_fields.py b/tests/test_postgres_automatic_fields.py index e147069..cdaf035 100644 --- a/tests/test_postgres_automatic_fields.py +++ b/tests/test_postgres_automatic_fields.py @@ -281,14 +281,10 @@ def expected_replication_keys(self): } if self.default_replication_method == self.INCREMENTAL: - return replication_keys + return replication_keys else: return {'postgres_automatic_fields_test' : set()} - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - @staticmethod def tap_name(): return "tap-postgres" @@ -432,7 +428,6 @@ def automatic_fields_test(self, conn_id): self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() ) records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] messages = records_by_stream[test_table_name]['messages'] # expected values From b365d85171911dc5d9f58d7f538ee0d35bd399e7 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 30 Mar 2021 15:57:15 +0000 Subject: [PATCH 10/26] address knonw bug with incremental --- tests/test_postgres_automatic_fields.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_postgres_automatic_fields.py b/tests/test_postgres_automatic_fields.py index cdaf035..014225b 100644 --- a/tests/test_postgres_automatic_fields.py +++ b/tests/test_postgres_automatic_fields.py @@ -369,15 +369,22 @@ def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = def test_run(self): """Parametrized automatic fields test running against each replication method.""" + # Test running a sync with no fields selected using full-table replication self.default_replication_method = self.FULL_TABLE full_table_conn_id = connections.ensure_connection(self) self.automatic_fields_test(full_table_conn_id) - # BUG? | We can't run a sync because replication-key isn't automatic + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication # self.default_replication_method = self.INCREMENTAL # incremental_conn_id = connections.ensure_connection(self, original_properties=False) # self.automatic_fields_test(incremental_conn_id) + # Test running a sync with no fields selected using logical replication self.default_replication_method = self.LOG_BASED with db_utils.get_test_connection('dev') as conn: conn.autocommit = True From 385988c1887d5f107884a9420d038bdf5159a58a Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Fri, 2 Apr 2021 00:53:42 +0000 Subject: [PATCH 11/26] discovery and db_utils cleanup --- tests/db_utils.py | 11 ++++++++--- tests/test_postgres_discovery.py | 13 +++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/db_utils.py b/tests/db_utils.py index ff3339d..c4827a9 100644 --- a/tests/db_utils.py +++ b/tests/db_utils.py @@ -72,9 +72,10 @@ def ensure_fresh_table(conn, conn_cursor, schema_name, table_name): conn_cursor2.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) if conn_cursor2.fetchone()[0] is None: conn_cursor2.execute(""" CREATE EXTENSION hstore; """) - conn_cursor2.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - conn_cursor2.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - conn_cursor2.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + conn_cursor2.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + conn_cursor2.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + conn_cursor2.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'GOOD', 'bad', 'ugly', 'u g l y') """) return conn_cursor2 @@ -121,3 +122,7 @@ def delete_record(conn_cursor, ctable_name, primary_key): # cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) conn_cursor.execute("DELETE FROM {} WHERE id = {}".format(ctable_name, primary_key)) + +def set_db_time_zone(conn_cursor, tz: str = "GMT"): + # set time zone 'America/New_York'; + conn_cursor.execute("SET TIME ZONE '{}'".format(tz)) diff --git a/tests/test_postgres_discovery.py b/tests/test_postgres_discovery.py index fa6cb93..ab825f1 100644 --- a/tests/test_postgres_discovery.py +++ b/tests/test_postgres_discovery.py @@ -346,11 +346,7 @@ def discovery_test(self, conn_id): - Verify schema and db match expectations for a given stream. - Verify schema types match expectations for a given stream. """ - ########################################################################## - ### TODO - ### [] Generate multiple tables (streams) and maybe dbs too? - ### [] Investigate potential bug, see DOCS_BUG_1 - ########################################################################## + # TODO Generate multiple tables (streams) and maybe dbs too? # run discovery (check mode) check_job_name = runner.run_check_mode(self, conn_id) @@ -433,9 +429,10 @@ def discovery_test(self, conn_id): self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - # DOCS_BUG_1 ? | The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. KNOWN_MISSING = { 'invalid_bigserial', # BIGSERIAL -> bigint 'invalid_serial', # SERIAL -> integer From cdc8638d0b2d510e16541f75d139bf280433fe9c Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Fri, 2 Apr 2021 00:54:16 +0000 Subject: [PATCH 12/26] datatypes test --- tests/datatype_file_reader.py | 32 + tests/test_postgres_datatypes.py | 788 + tests/text_datatype.txt | 224384 ++++++++++++++++++++++++++++ 3 files changed, 225204 insertions(+) create mode 100644 tests/datatype_file_reader.py create mode 100644 tests/test_postgres_datatypes.py create mode 100644 tests/text_datatype.txt diff --git a/tests/datatype_file_reader.py b/tests/datatype_file_reader.py new file mode 100644 index 0000000..bc1f460 --- /dev/null +++ b/tests/datatype_file_reader.py @@ -0,0 +1,32 @@ +import os + +potential_paths = [ + 'tests/', + '../tests/' + 'tap-postgres/tests/', + '../tap-postgres/tests/', +] + +datatype_to_file = { + "text": "text_datatype.txt", +} + +def _go_to_tests_directory(): + for path in potential_paths: + if os.path.exists(path): + os.chdir(path) + return os.getcwd() + raise NotImplementedError("This reader cannot run from {}".format(os.getcwd())) + + +def read_in(datatype: str = "text"): + print("Acquiring path to tests directory.") + cwd = _go_to_tests_directory() + + filename = datatype_to_file[datatype] + + print("Reading contents of {}.".format(filename)) + with open(cwd + "/" + filename, "r") as data: + contents = data.read() + + return contents diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py new file mode 100644 index 0000000..45d7660 --- /dev/null +++ b/tests/test_postgres_datatypes.py @@ -0,0 +1,788 @@ +import os +import datetime +import copy +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + +expected_schema = {'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'OUR TS': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format': 'date-time', 'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'minimum': -2147483648, 'type': ['integer']}, + 'unsupported_bit': {}, + 'unsupported_bit_varying': {}, + 'unsupported_box': {}, + 'unsupported_bytea': {}, + 'unsupported_circle': {}, + 'unsupported_interval': {}, + 'unsupported_line': {}, + 'unsupported_lseg': {}, + 'unsupported_path': {}, + 'unsupported_pg_lsn': {}, + 'unsupported_point': {}, + 'unsupported_polygon': {}, + 'unsupported_tsquery': {}, + 'unsupported_tsvector': {}, + 'unsupported_txid_snapshot': {}, + 'unsupported_xml': {}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, + 'minimum': -9223372036854775808, + 'type': ['null', 'integer']}, + 'our_bigserial': {'maximum': 9223372036854775807, + 'minimum': -9223372036854775808, + 'type': ['null', 'integer']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_boolean': {'type': ['null', 'boolean']}, + 'our_char': {'maxLength': 1, 'type': ['null', 'string']}, + 'our_char_big': {'maxLength': 10485760, 'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_decimal': {'exclusiveMaximum': True, + 'exclusiveMinimum': True, + 'maximum': 100000000000000000000000000000000000000000000000000000000000000, + 'minimum': -100000000000000000000000000000000000000000000000000000000000000, + 'multipleOf': "Decimal('1E-38')", + 'type': ['null', 'number']}, + 'our_double': {'type': ['null', 'number']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, + 'minimum': -2147483648, + 'type': ['null', 'integer']}, + 'our_json': {'type': ['null', 'string']}, # TODO Should this have a format?? + 'our_jsonb': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}, + 'our_nospec_decimal': {'exclusiveMaximum': True, + 'exclusiveMinimum': True, + 'maximum': 100000000000000000000000000000000000000000000000000000000000000, + 'minimum': -100000000000000000000000000000000000000000000000000000000000000, + 'multipleOf': "Decimal('1E-38')", + 'type': ['null', 'number']}, + 'our_nospec_numeric': {'exclusiveMaximum': True, + 'exclusiveMinimum': True, + 'maximum': 100000000000000000000000000000000000000000000000000000000000000, + 'minimum': -100000000000000000000000000000000000000000000000000000000000000, + 'multipleOf': "Decimal('1E-38')", + 'type': ['null', 'number']}, + 'our_numeric': {'exclusiveMaximum': True, + 'exclusiveMinimum': True, + 'maximum': 100000000000000000000000000000000000000000000000000000000000000, + 'minimum': -100000000000000000000000000000000000000000000000000000000000000, + 'multipleOf': "Decimal('1E-38')", + 'type': ['null', 'number']}, + 'our_real': {'type': ['null', 'number']}, + 'our_serial': {'maximum': 2147483647, + 'minimum': -2147483648, + 'type': ['null', 'integer']}, + 'our_smallint': {'maximum': 32767, + 'minimum': -32768, + 'type': ['null', 'integer']}, + 'our_smallserial': {'maximum': 32767, + 'minimum': -32768, + 'type': ['null', 'integer']}, + 'our_hstore': {'properties': {}, 'type': ['null', 'object']}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_varchar_big': {'maxLength': 10485760, 'type': ['null', 'string']}} + + +decimal.getcontext().prec = 131072 + 16383 + +whitespace = ' \t\n\r\v\f' +ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' +ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +ascii_letters = ascii_lowercase + ascii_uppercase +digits = '0123456789' +punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +our_ascii = ascii_letters + digits + punctuation + whitespace + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + - Cover the following character sets: + LATIN1 ISO 8859-1, ECMA 94 Western European Yes 1 ISO88591 + LATIN2 ISO 8859-2, ECMA 94 Central European Yes 1 ISO88592 + LATIN3 ISO 8859-3, ECMA 94 South European Yes 1 ISO88593 + LATIN4 ISO 8859-4, ECMA 94 North European Yes 1 ISO88594 + LATIN5 ISO 8859-9, ECMA 128 Turkish Yes 1 ISO88599 + LATIN6 ISO 8859-10, ECMA 144 Nordic Yes 1 ISO885910 + LATIN7 ISO 8859-13 Baltic Yes 1 ISO885913 + LATIN8 ISO 8859-14 Celtic Yes 1 ISO885914 + LATIN9 ISO 8859-15 LATIN1 with Euro and accents Yes 1 ISO885915 + LATIN10 ISO 8859-16, ASRO SR 14111 Romanian Yes 1 ISO885916 + UTF8 Unicode, 8-bit all Yes 1-4 Unicode + + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + - Test all precisions 0..6 + + UUID + TODOs + - uuid.uuid1(node=None, clock_seq=None) + Generate a UUID from a host ID, sequence number, and the current time. If node is not given, getnode() is used to obtain the hardware address. If clock_seq is given, it is used as the sequence number; otherwise a random 14-bit sequence number is chosen. + + - uuid.uuid3(namespace, name) + Generate a UUID based on the MD5 hash of a namespace identifier (which is a UUID) and a name (which is a string). + + - uuid.uuid4() + Generate a random UUID. + + - uuid.uuid5(namespace, name) + Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a UUID) and a name (which is a string). + + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + db_utils.set_db_time_zone(cur, '+15:59') #'America/New_York') + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_nospec_numeric NUMERIC, + our_numeric NUMERIC(1000, 500), + our_nospec_decimal DECIMAL, + our_decimal DECIMAL(1000, 500), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_hstore HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + unsupported_bit BIT(80), + unsupported_bit_varying BIT VARYING(80), + unsupported_box BOX, + unsupported_bytea BYTEA, + unsupported_circle CIRCLE, + unsupported_interval INTERVAL, + unsupported_line LINE, + unsupported_lseg LSEG, + unsupported_path PATH, + unsupported_pg_lsn PG_LSN, + unsupported_point POINT, + unsupported_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + unsupported_tsquery TSQUERY, + unsupported_tsvector TSVECTOR, + unsupported_txid_snapshot TXID_SNAPSHOT, + unsupported_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records by test cases + self.inserted_records = [] + self.expected_records = dict() + + + # insert a record wtih minimum values + our_tz = pytz.timezone('Singapore') # GMT+8 + min_date = datetime.date(1, 1, 1) + my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} + # TODO | BUG ? | The target blows up with greater than 38 digits before/after the decimal. + # Is this a known/expected behavior or a BUG in the target? + # It prevents us from testing what the tap claims to be able to support (100 precision, 38 scale) without rounding AND.. + # The postgres limits WITH rounding. + # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET + # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 62 + '.' + '9' * 37) # 131072 + 16383 + # my_absurdly_small_spec_decimal = decimal.Decimal('-' + '9'*500 + '.' + '9'*500) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': "", # TEXT + 'our_text_2': "", # TEXT, TODO move our_ascii into it's own record + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_nospec_numeric': my_absurdly_small_decimal, # NUMERIC, + 'our_numeric': my_absurdly_small_decimal, # NUMERIC(1000, 500), + 'our_nospec_decimal': my_absurdly_small_decimal, # DECIMAL, + 'our_decimal': my_absurdly_small_decimal, # DECIMAL(1000, 500), + quote_ident('OUR TS', cur): '0001-01-01T00:00:00.000001', # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): '0001-01-01T00:00:00.000001-15:59',#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00.000001', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00.000001-15:59', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': -1.79769313486231e+308, # DOUBLE PRECISION + 'our_real': decimal.Decimal('-3.40282e+38'), # REAL, + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': json.dumps(dict()), # JSON, + 'our_jsonb': json.dumps(dict()), # JSONB, + 'our_uuid': '00000000-0000-0000-0000-000000000000', # str(uuid.uuid1()) + 'our_hstore': None, # HSTORE, + 'our_citext': "", # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + }) + self.expected_records['minimum_boundary_general'] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records['minimum_boundary_general'].update({ + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_double': decimal.Decimal('-1.79769313486231e+308'), + 'OUR TS': '0001-01-01T00:00:00.000001+00:00', + 'OUR TS TZ': '0001-01-01T15:59:00.000001+00:00', + 'OUR TIME': '00:00:00.000001', + 'OUR TIME TZ': '00:00:00.000001-15:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_bit': False, + 'our_jsonb': json.loads(self.inserted_records[-1]['our_jsonb']), + 'our_inet': '12.244.233.165', + }) + my_keys = set(self.expected_records['minimum_boundary_general'].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records['minimum_boundary_general'][key] + + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # insert a record wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + base_string = "Bread Sticks From Olive Garden" + my_absurdly_large_decimal = decimal.Decimal('9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} + # 🥖 = 1f956 + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': "🥖", # CHAR, + 'our_varchar': "a", #* 20971520, # VARCHAR, + 'our_varchar_big': "🥖" + base_string, # VARCHAR(10485714), + 'our_char_big': "🥖", # CHAR(10485760), + 'our_text': "apples", #dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_nospec_numeric': my_absurdly_large_decimal, # NUMERIC, + 'our_numeric': my_absurdly_large_decimal, # NUMERIC(1000, 500), + 'our_nospec_decimal': my_absurdly_large_decimal, # DECIMAL, + 'our_decimal': my_absurdly_large_decimal, # NUMERIC(1000, 500), + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): '9999-12-31T08:00:59.999999-15:59', #max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999+1559', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': decimal.Decimal('9.99999999999999'), # '1E308', # DOUBLE PRECISION, + 'our_real': decimal.Decimal('9.99999'), # '1E308', # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': json.dumps({ + 'our_json_string': 'This is our JSON string type.', + 'our_json_number': 666, + 'our_json_object': { + 'our_json_string': 'This is our JSON string type.', + 'our_json_number': 666, + 'our_json_object': {'calm': 'down'}, + 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_json_boolean': True, + 'our_json_null': None, + }, + 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_json_boolean': True, + 'our_json_null': None, + }), # JSON, + 'our_jsonb': json.dumps({ + 'our_jsonb_string': 'This is our JSONB string type.', + 'our_jsonb_number': 666, + 'our_jsonb_object': { + 'our_jsonb_string': 'This is our JSONB string type.', + 'our_jsonb_number': 666, + 'our_jsonb_object': {'calm': 'down'}, + 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_jsonb_boolean': True, + 'our_jsonb_null': None, + }, + 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_jsonb_boolean': True, + 'our_jsonb_null': None, + }), # JSONB, + 'our_uuid':'ffffffff-ffff-ffff-ffff-ffffffffffff', # UUID, + 'our_hstore': '"foo"=>"bar","bar"=>"foo","dumdum"=>Null', # HSTORE, + 'our_citext': "aPpLeS", # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/24',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': 'u g l y', # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + }) + + self.expected_records['maximum_boundary_general'] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records['maximum_boundary_general'].update({ + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+15:59', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_char_big': "🥖" + " " * 10485759, + 'our_bit': True, + 'our_cidr': '2001:db8::ff00:42:7879/128', + 'our_jsonb': json.loads(self.inserted_records[-1]['our_jsonb']), + 'our_hstore': {'foo': 'bar', 'bar': 'foo', 'dumdum': None}, + }) + my_keys = set(self.expected_records['maximum_boundary_general'].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records['maximum_boundary_general'][key] + + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # insert a record with valid values for unsupported types + self.inserted_records.append({ + 'id': 9999, + 'unsupported_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'unsupported_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'unsupported_box': '((50, 50), (0, 0))', # BOX, + 'unsupported_bytea': "E'\\255'", # BYTEA, + 'unsupported_circle': '< (3, 1), 4 >', # CIRCLE, + 'unsupported_interval': '178000000 years', # INTERVAL, + 'unsupported_line': '{6, 6, 6}', # LINE, + 'unsupported_lseg': '(0 , 45), (45, 90)', # LSEG, + 'unsupported_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'unsupported_pg_lsn': '16/B374D848', # PG_LSN, + 'unsupported_point': '(1, 2)', # POINT, + 'unsupported_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'unsupported_tsquery': "'fat' & 'rat'", # TSQUERY, + 'unsupported_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'unsupported_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'unsupported_xml': 'bar', # XML) + }) + self.expected_records['unsupported_types'] = { + 'id': 9999, + } + + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with a text value ~ 10 Megabytes + self.inserted_records.append({ + 'id': 666, + 'our_text': dfr.read_in('text') + }) + self.expected_records['maximum_boundary_text'] = { + 'id': self.inserted_records[-1]['id'], + 'our_text': self.inserted_records[-1]['our_text'], + } + + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + # # 🥖 = 1f956 + # self.inserted_records.append({ + # 'id': 2147483647, # SERIAL PRIMARY KEY, + # 'our_char': "🥖", # CHAR, + # 'our_varchar': "a" * 20971520 # VARCHAR, + # 'our_varchar_big': "🥖" * 5242880base_string, # VARCHAR(10485760), + # 'our_char_big': "🥖", # CHAR(10485760), + + # add a record with a text value ~ 10 Megabytes + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'unsupported_bigserial', + 'unsupported_bit_varying', + 'unsupported_box', + 'unsupported_bytea', + 'unsupported_circle', + 'unsupported_interval', + 'unsupported_line', + 'unsupported_lseg', + 'unsupported_path', + 'unsupported_pg_lsn', + 'unsupported_point', + 'unsupported_polygon', + 'unsupported_serial', + 'unsupported_smallserial', + 'unsupported_tsquery', + 'unsupported_tsvector', + 'unsupported_txid_snapshot', + 'unsupported_xml', + } + + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_hstore': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'unsupported_bigserial': 'bigint', + 'unsupported_bit_varying': 'bit varying', + 'unsupported_box': 'box', + 'unsupported_bytea': 'bytea', + 'unsupported_circle': 'circle', + 'unsupported_interval': 'interval', + 'unsupported_line': 'line', + 'unsupported_lseg': 'lseg', + 'unsupported_path': 'path', + 'unsupported_pg_lsn': 'pg_lsn', + 'unsupported_point': 'point', + 'unsupported_polygon': 'polygon', + 'unsupported_serial': 'integer', + 'unsupported_smallserial': 'smallint', + 'unsupported_tsquery': 'tsquery', + 'unsupported_tsvector': 'tsvector', + 'unsupported_txid_snapshot': 'txid_snapshot', + 'unsupported_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # TODO Parametrize tests to also run against multiple local (db) timezones + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.set_db_time_zone('America/New_York') + + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + # TODO need to enable multiple replication methods (see auto fields test) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : self.default_replication_method}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the persisted schema matches expectations TODO NEED TO GO TRHOUGH SCHEMA MANUALLY STILL + # self.assertEqual(expected_schema, records_by_stream[test_table_name]['schema']) + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # Each record was inserted with a specific test case in mind + for test_case, message in zip(self.expected_records.keys(), messages[1:]): + with self.subTest(test_case=test_case): + + # grab our expected record + expected_record = self.expected_records[test_case] + + # Verify replicated records match our expectations + for field in expected_record.keys(): + with self.subTest(field=field): + + # some data types require adjustments to actual values to make valid comparison... + if field == 'our_jsonb': + expected_field_value = expected_record.get(field, '{"MISSING": "FIELD"}') + actual_field_value = json.loads(message['data'].get(field, '{"MISSING": "FIELD"}')) + + self.assertDictEqual(expected_field_value, actual_field_value) + + # but most type do not + else: + + expected_field_value = expected_record.get(field, "MISSING FIELD") + actual_field_value = message['data'].get(field, "MISSING FIELD") + + self.assertEqual(expected_field_value, actual_field_value) + + +SCENARIOS.add(PostgresDatatypes) diff --git a/tests/text_datatype.txt b/tests/text_datatype.txt new file mode 100644 index 0000000..2c08d16 --- /dev/null +++ b/tests/text_datatype.txt @@ -0,0 +1,224384 @@ +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_automatic_fields_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_automatic_fields_test" +test_db = "dev" + +class PostgresAutomaticFields(unittest.TestCase): + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + default_replication_method = "" + + def tearDown(self): + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 19972, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 19972, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 19873, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 19873, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_automatic_fields_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_automatic_fields_test' } + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_automatic_fields_test' : {'id'} + } + + def expected_replication_keys(self): + replication_keys = { + 'postgres_automatic_fields_test' : {'our_integer'} + } + + if self.default_replication_method == self.INCREMENTAL: + return replication_keys + else: + return {'postgres_automatic_fields_test' : set()} + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_automatic_fields" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "our_integer" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + + def test_run(self): + """Parametrized automatic fields test running against each replication method.""" + + # Test running a sync with no fields selected using full-table replication + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self) + self.automatic_fields_test(full_table_conn_id) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys. + # As a result we cannot run a sync with no fields selected. This BUG should not + # be carried over into hp-postgres, but will not be fixed for this tap. + + # Test running a sync with no fields selected using key-based incremental replication + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.automatic_fields_test(incremental_conn_id) + + # Test running a sync with no fields selected using logical replication + self.default_replication_method = self.LOG_BASED + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + db_utils.ensure_replication_slot(cur, test_db) + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.automatic_fields_test(log_based_conn_id) + + + def automatic_fields_test(self, conn_id): + """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and NO FIELDS within the table'.format(test_table_name)) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # expected values + expected_primary_keys = self.expected_primary_keys()[test_table_name] + expected_replication_keys = self.expected_replication_keys()[test_table_name] + expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) + + # collect actual values + record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] + + # verify the message actions match expectations for all replication methods + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # Verify that you get some records for each stream + self.assertGreater(record_count_by_stream[test_table_name], 0) + + # Verify that only the automatic fields are sent to the target + for actual_fields in record_messages_keys: + self.assertSetEqual(expected_automatic_fields, actual_fields) + + +SCENARIOS.add(PostgresAutomaticFields) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error +import datatype_file_reader as dfr # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_datatypes_test" +test_db = "dev" + + +class PostgresDatatypes(unittest.TestCase): + """ + TODO | My Running list + + + Arbitrary Precision Numbers + Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point + when precision is explicitly stated, maximum is 1000 digits + TODOs + - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). + - Cover Maximum precision and scale + - Cover Minimum precision and scale + - Cover NaN + + + Floating-Point Types + - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic + - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits + - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits + - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. + TODOs + - Cover NaN, -Inf, Inf + - + + + Character + - + TODOS + - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + - VARCHAR(10485760) + - Generate a 1 GB string?? + + Binary Types + Bytea | binary string, sequence of octets can be written in hex or escape + TODOs + - Generate different fields for hex and escape + + + Network Address Types + TODOs + - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' + - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 + - For mac do all the input formats + [] '08:00:2b:01:02:03' + [] '08-00-2b-01-02-03' + [] '08002b:010203' + [] '08002b-010203' + [] '0800.2b01.0203' + [] '08002b010203' + + + Datestimes + TODOs + - Test values with second, millisecond and micrsecond precision + + Boolean + TODOs + - Enter all accpetable inputs for True: + TRUE + 't' + 'true' + 'y' + 'yes' + '1' + - Enter all acceptable inputs for False: + FALSE + 'f' + 'false' + 'n' + 'no' + '0' + """ + + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_big VARCHAR(10485760), + our_char CHAR, + our_char_big CHAR(10485760), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + our_bigserial BIGSERIAL, + invalid_bit BIT(80), + invalid_bit_varying BIT VARYING(80), + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + our_serial SERIAL, + our_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 wtih minimum values + min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) + our_tz = pytz.timezone('Singapore') # GMT+8 + #min_ts_tz = our_tz.localize(min_ts) # TODO + # our_time = datetime.time(0, 0, 0) + # our_time_tz = our_time.isoformat() + "-04:00" + # our_date = datetime.date(1998, 3, 4) + min_date = datetime.date(1, 1, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'id': 1,# SERIAL PRIMARY KEY, + 'our_char': "a", # CHAR, + 'our_varchar': "", # VARCHAR, + 'our_varchar_big': "", # VARCHAR(10485760), + 'our_char_big': "a", # CHAR(10485760), + 'our_text': " ", # TEXT + 'our_text_2': "", # TEXT, + 'our_integer': -2147483648, # INTEGER, + 'our_smallint': -32768, # SMALLINT, + 'our_bigint': -9223372036854775808, # BIGINT, + 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': False, # BOOLEAN, + 'our_bit': '0', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '12.244.233.165/32', # cidr, + 'our_inet': '12.244.233.165/32', # inet, + 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_bigserial': 1, # BIGSERIAL, + 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), + 'invalid_bit': 80 * '0', # BIT(80), + 'invalid_box': None, # BOX, + 'invalid_bytea': "E'\\000'", # BYTEA, + 'invalid_circle': None, # CIRCLE, + 'invalid_interval': '-178000000 years', # INTERVAL, + 'invalid_line': None, # LINE, + 'invalid_lseg': None, # LSEG, + 'invalid_path': None, # PATH, + 'invalid_pg_lsn': None, # PG_LSN, + 'invalid_point': None, # POINT, + 'invalid_polygon': None, # POLYGON, + 'our_serial': 1, # SERIAL, + 'our_smallserial': 1, # SMALLSERIAL, + 'invalid_tsquery': None, # TSQUERY, + 'invalid_tsvector': None, # TSVECTOR, + 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, + 'invalid_xml': None, # XML) + }) + self.expected_records.append({ + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a" + (10485760 - 1) * " ", # padded + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_decimal': '0.000000', + 'OUR TS': '0001-01-01T00:00:00+00:00', + 'OUR TS TZ': '0001-01-01T00:00:00+00:00', + 'OUR TIME': '00:00:00', + 'OUR TIME TZ': '00:00:00+14:59', + 'OUR DATE': '0001-01-01T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': False, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) + + + # record 2 wtih maximum values + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + # nyc_tz = pytz.timezone('America/New_York') + # our_ts_tz = nyc_tz.localize(our_ts) + # our_time = datetime.time(12,11,10) + # our_time_tz = our_time.isoformat() + "-04:00" + max_date = datetime.date(9999, 12, 31) + my_uuid = str(uuid.uuid1()) + base_string = "Bread Sticks From Olive Garden" + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': None, # CHAR, + 'our_varchar': None, # VARCHAR, + 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), + 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), + 'our_text': dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + # '24:00:00.000000' -> 00:00:00 TODO BUG? + quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': None, # DOUBLE PRECISION, + 'our_real': None, # REAL, # TODO + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': None, # JSON, + 'our_jsonb': None, # JSONB, + 'our_uuid': None, # UUID, + 'our_store': None, # HSTORE, + 'our_citext': None, # CITEXT, + 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, + 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, + 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_alignment_enum': None, # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'invalid_box': '((50, 50), (0, 0))', # BOX, + 'invalid_bytea': "E'\\255'", # BYTEA, + 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, + 'invalid_interval': '178000000 years', # INTERVAL, + 'invalid_line': '{6, 6, 6}', # LINE, + 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, + 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'invalid_pg_lsn': '16/B374D848', # PG_LSN, + 'invalid_point': '(1, 2)', # POINT, + 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, + 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'invalid_xml': 'bar', # XML) + }) + self.expected_records.append({ + 'id': 2147483647, + 'our_char': self.inserted_records[-1]['our_char'], + 'our_varchar': self.inserted_records[-1]['our_varchar'], + 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], + 'our_char_big': self.inserted_records[-1]['our_char_big'], + 'our_text': self.inserted_records[-1]['our_text'], + 'our_text_2': self.inserted_records[-1]['our_text_2'], + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_decimal':decimal.Decimal('9876543210.02'), # TODO + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+00:00', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_double': None, + 'our_real': None, + 'our_boolean': self.inserted_records[-1]['our_boolean'], + 'our_bit': True, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_store': None, + 'our_citext': None, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_alignment_enum': None, + 'our_money': self.inserted_records[-1]['our_money'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) + + + @staticmethod + def expected_check_streams(): + return { 'postgres_datatypes_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_datatypes_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_datatypes_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_datatypes" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'dev' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized datatypes test running against each replication method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL + # incremental_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(incremental_conn_id) + + # self.default_replication_method = self.LOG_BASED + # log_based_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(log_based_conn_id) + + def datatypes_test(self, conn_id): + """ + Test Description: + Basic Datatypes Test for a database tap. + + Test Cases: + + """ + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the number of records and number of messages match our expectations + expected_record_count = len(self.expected_records) + expected_message_count = expected_record_count + 2 # activate versions + self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) + self.assertEqual(expected_message_count, len(messages)) + + # verify we start and end syncs with an activate version message + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('activate_version', messages[-1]['action']) + + # verify the remaining messages are upserts + actions = {message['action'] for message in messages if message['action'] != 'activate_version'} + self.assertSetEqual({'upsert'}, actions) + + + # NB | assertDictEquals gives ugly output due to HUGE string values in our records so + # use the subTest pattern and go value by value instead. + + + # verify expected minimum values were replicated + expected_record_mins = self.expected_records[0] + for key in expected_record_mins.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) + + + # verify expected maximum values were replicated + expected_record_maxes = self.expected_records[1] + for key in expected_record_maxes.keys(): + with self.subTest(field=key): + self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) + + +SCENARIOS.add(PostgresDatatypes) +import os +import datetime +import unittest +import decimal +import uuid +import json + +from psycopg2.extensions import quote_ident +import psycopg2.extras +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_discovery_test" +test_db = "discovery1" + + +class PostgresDiscovery(unittest.TestCase): + AUTOMATIC_FIELDS = "automatic" + REPLICATION_KEYS = "valid-replication-keys" + PRIMARY_KEYS = "table-key-properties" + FOREIGN_KEYS = "table-foreign-key-properties" + REPLICATION_METHOD = "forced-replication-method" + API_LIMIT = "max-row-limit" + INCREMENTAL = "INCREMENTAL" + FULL_TABLE = "FULL_TABLE" + LOG_BASED = "LOG_BASED" + + UNSUPPORTED_TYPES = { + "BIGSERIAL", + "BIT VARYING", + "BOX", + "BYTEA", + "CIRCLE", + "INTERVAL", + "LINE", + "LSEG", + "PATH", + "PG_LSN", + "POINT", + "POLYGON", + "SERIAL", + "SMALLSERIAL", + "TSQUERY", + "TSVECTOR", + "TXID_SNAPSHOT", + "XML", + } + default_replication_method = "" + + def tearDown(self): + pass + # with db_utils.get_test_connection(test_db) as conn: + # conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db(test_db) + self.maxDiff = None + + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # db_utils.ensure_replication_slot(cur, test_db) + + canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money, + invalid_bigserial BIGSERIAL, + invalid_bit_varying BIT VARYING, + invalid_box BOX, + invalid_bytea BYTEA, + invalid_circle CIRCLE, + invalid_interval INTERVAL, + invalid_line LINE, + invalid_lseg LSEG, + invalid_path PATH, + invalid_pg_lsn PG_LSN, + invalid_point POINT, + invalid_polygon POLYGON, + invalid_serial SERIAL, + invalid_smallserial SMALLSERIAL, + invalid_tsquery TSQUERY, + invalid_tsvector TSVECTOR, + invalid_txid_snapshot TXID_SNAPSHOT, + invalid_xml XML) + """.format(canonicalized_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.recs = [] + for _ in range(500): + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + record = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + db_utils.insert_record(cur, test_table_name, record) + self.recs.append(record) + + cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) + + @staticmethod + def expected_check_streams(): + return { 'postgres_discovery_test'} + + def expected_check_stream_ids(self): + """A set of expected table names in format""" + check_streams = self.expected_check_streams() + return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} + + @staticmethod + def expected_primary_keys(): + return { + 'postgres_discovery_test' : {'id'} + } + + @staticmethod + def expected_unsupported_fields(): + return { + 'invalid_bigserial', + 'invalid_bit_varying', + 'invalid_box', + 'invalid_bytea', + 'invalid_circle', + 'invalid_interval', + 'invalid_line', + 'invalid_lseg', + 'invalid_path', + 'invalid_pg_lsn', + 'invalid_point', + 'invalid_polygon', + 'invalid_serial', + 'invalid_smallserial', + 'invalid_tsquery', + 'invalid_tsvector', + 'invalid_txid_snapshot', + 'invalid_xml', + } + @staticmethod + def expected_schema_types(): + return { + 'id': 'integer', # 'serial primary key', + 'our_varchar': 'character varying', # 'varchar' + 'our_varchar_10': 'character varying', # 'varchar(10)', + 'our_text': 'text', + 'our_text_2': 'text', + 'our_integer': 'integer', + 'our_smallint': 'smallint', + 'our_bigint': 'bigint', + 'our_decimal': 'numeric', + 'OUR TS': 'timestamp without time zone', + 'OUR TS TZ': 'timestamp with time zone', + 'OUR TIME': 'time without time zone', + 'OUR TIME TZ': 'time with time zone', + 'OUR DATE': 'date', + 'our_double': 'double precision', + 'our_real': 'real', + 'our_boolean': 'boolean', + 'our_bit': 'bit', + 'our_json': 'json', + 'our_jsonb': 'jsonb', + 'our_uuid': 'uuid', + 'our_store': 'hstore', + 'our_citext': 'citext', + 'our_cidr': 'cidr', + 'our_inet': 'inet', + 'our_mac': 'macaddr', + 'our_alignment_enum': 'alignment', + 'our_money': 'money', + 'invalid_bigserial': 'bigint', + 'invalid_bit_varying': 'bit varying', + 'invalid_box': 'box', + 'invalid_bytea': 'bytea', + 'invalid_circle': 'circle', + 'invalid_interval': 'interval', + 'invalid_line': 'line', + 'invalid_lseg': 'lseg', + 'invalid_path': 'path', + 'invalid_pg_lsn': 'pg_lsn', + 'invalid_point': 'point', + 'invalid_polygon': 'polygon', + 'invalid_serial': 'integer', + 'invalid_smallserial': 'smallint', + 'invalid_tsquery': 'tsquery', + 'invalid_tsvector': 'tsvector', + 'invalid_txid_snapshot': 'txid_snapshot', + 'invalid_xml': 'xml', + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_discovery" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + def get_properties(self, original_properties=True): + return_value = { + 'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : self.FULL_TABLE, + 'filter_dbs' : 'discovery1' + } + if not original_properties: + if self.default_replication_method is self.LOG_BASED: + return_value['wal2json_message_format'] = '1' + + return_value['default_replication_method'] = self.default_replication_method + + return return_value + + def test_run(self): + """Parametrized discovery test running against each replicatio method.""" + + self.default_replication_method = self.FULL_TABLE + full_table_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(full_table_conn_id) + + self.default_replication_method = self.INCREMENTAL + incremental_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(incremental_conn_id) + + # NB | We are able to generate a connection and run discovery with a default replication + # method of logical replication WITHOUT selecting a replication slot. This is not + # ideal behavior. This BUG should not be carried over into hp-postgres, but will not + # be fixed for this tap. + self.default_replication_method = self.LOG_BASED + log_based_conn_id = connections.ensure_connection(self, original_properties=False) + self.discovery_test(log_based_conn_id) + + def discovery_test(self, conn_id): + """ + Basic Discovery Test for a database tap. + + Test Description: + Ensure discovery runs without exit codes and generates a catalog of the expected form + + Test Cases: + - Verify discovery generated the expected catalogs by name. + - Verify that the table_name is in the format for each stream. + - Verify the caatalog is found for a given stream. + - Verify there is only 1 top level breadcrumb in metadata for a given stream. + - Verify replication key(s) match expectations for a given stream. + - Verify primary key(s) match expectations for a given stream. + - Verify the replication method matches our expectations for a given stream. + - Verify that only primary keys are given the inclusion of automatic in metadata + for a given stream. + - Verify expected unsupported fields are given the inclusion of unsupported in + metadata for a given stream. + - Verify that all fields for a given stream which are not unsupported or automatic + have inclusion of available. + - Verify row-count metadata matches expectations for a given stream. + - Verify selected metadata is None for all streams. + - Verify is-view metadata is False for a given stream. + - Verify no forced-replication-method is present in metadata for a given stream. + - Verify schema and db match expectations for a given stream. + - Verify schema types match expectations for a given stream. + """ + # TODO Generate multiple tables (streams) and maybe dbs too? + + # run discovery (check mode) + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # Verify discovery generated a catalog + found_catalogs = menagerie.get_catalogs(conn_id) + self.assertGreater(len(found_catalogs), 0) + + # Verify discovery generated the expected catalogs by name + found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # Verify that the table_name is in the format for each stream + found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) + + # Test by stream + for stream in self.expected_check_streams(): + with self.subTest(stream=stream): + + # Verify the caatalog is found for a given stream + catalog = next(iter([catalog for catalog in found_catalogs + if catalog["stream_name"] == stream])) + self.assertTrue(isinstance(catalog, dict)) + + # collecting expected values + expected_primary_keys = self.expected_primary_keys()[stream] + expected_replication_keys = set() + expected_unsupported_fields = self.expected_unsupported_fields() + expected_fields_to_datatypes = self.expected_schema_types() + expected_row_count = len(self.recs) + + # collecting actual values... + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + stream_metadata = schema_and_metadata["metadata"] + top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] + stream_properties = top_level_metadata[0]['metadata'] + actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) + actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) + actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) + actual_automatic_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "automatic" + ) + actual_unsupported_fields = set( + item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata + if item.get("metadata").get("inclusion") == "unsupported" + ) + actual_fields_to_datatypes = { + item['breadcrumb'][1]: item['metadata'].get('sql-datatype') + for item in stream_metadata[1:] + } + + # Verify there is only 1 top level breadcrumb in metadata + self.assertEqual(1, len(top_level_metadata)) + + # Verify replication key(s) match expectations + self.assertSetEqual( + expected_replication_keys, actual_replication_keys + ) + + # NB | We expect primary keys and replication keys to have inclusion automatic for + # key-based incremental replication. But that is only true for primary keys here. + # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. + + # Verify primary key(s) match expectations + self.assertSetEqual( + expected_primary_keys, actual_primary_keys, + ) + + # Verify the replication method matches our expectations + self.assertIsNone(actual_replication_method) + + # Verify that only primary keys + # are given the inclusion of automatic in metadata. + self.assertSetEqual(expected_primary_keys, actual_automatic_fields) + + + # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 + # The following types were converted and selected, but docs say unsupported. + # Still need to investigate how the tap handles values of these datatypes + # during sync. + KNOWN_MISSING = { + 'invalid_bigserial', # BIGSERIAL -> bigint + 'invalid_serial', # SERIAL -> integer + 'invalid_smallserial', # SMALLSERIAL -> smallint + } + # Verify expected unsupported fields + # are given the inclusion of unsupported in metadata. + self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) + + + # Verify that all other fields have inclusion of available + # This assumes there are no unsupported fields for SaaS sources + self.assertTrue( + all({item.get("metadata").get("inclusion") == "available" + for item in stream_metadata + if item.get("breadcrumb", []) != [] + and item.get("breadcrumb", ["properties", None])[1] + not in actual_automatic_fields + and item.get("breadcrumb", ["properties", None])[1] + not in actual_unsupported_fields}), + msg="Not all non key properties are set to available in metadata") + + # Verify row-count metadata matches expectations + self.assertEqual(expected_row_count, stream_properties['row-count']) + + # Verify selected metadata is None for all streams + self.assertNotIn('selected', stream_properties.keys()) + + # Verify is-view metadata is False + self.assertFalse(stream_properties['is-view']) + + # Verify no forced-replication-method is present in metadata + self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) + + # Verify schema and db match expectations + self.assertEqual(test_schema_name, stream_properties['schema-name']) + self.assertEqual(test_db, stream_properties['database-name']) + + # Verify schema types match expectations + self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) + +SCENARIOS.add(PostgresDiscovery) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import SCENARIOS +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +test_schema_name = "public" +test_table_name = "postgres_drop_table_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +class PostgresDropTable(unittest.TestCase): + + @staticmethod + def name(): + return "tap_tester_postgres_drop_table_field_selection" + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'discovery0' + } + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def expected_check_streams(): + return { 'discovery0-public-postgres_drop_table_test'} + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('discovery0') + + with db_utils.get_test_connection('discovery0') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + #pylint: disable=line-too-long + create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # Run discovery + check_job_name = runner.run_check_mode(self, conn_id) + + # Verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # There should not be any tables in this database + with db_utils.get_test_connection('discovery0') as conn: + cur = conn.cursor() + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + # Run discovery again + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + + # When discovery mode finds 0 tables, the tap returns an error + self.assertEqual(exit_status['discovery_exit_status'], 1) + + + + +SCENARIOS.add(PostgresDropTable) +import datetime +import decimal +import json +import os +import unittest +import uuid + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresFullTableRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + print("inserting a record") + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'upsert') + self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], + 'activate_version') + actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] + + expected_inserted_record = {'id': 1, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + print("inserted record is correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNone(bookmark.get('lsn'), + msg="expected bookmark for stream to have NO lsn because we are using full-table replication") + +SCENARIOS.add(PostgresFullTableRepArrays) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import psycopg2.extras +from psycopg2.extensions import quote_ident +import pytz +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +NUMERIC_SCALE=2 +NUMERIC_PRECISION=12 + +expected_schemas = {'postgres_full_table_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties': {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], + 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, + 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +test_schema_name = "public" +test_table_name = "postgres_full_table_replication_test" + + +class PostgresFullTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC({},{}), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + # record 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps({'burgers' : 'good'}), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal 4', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'good', + 'our_money': '100.1122', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good"}', + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], + 'our_money' : '$100.11' + }) + # record 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps(["nymn 77"]), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '["nymn 77"]', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_alignment_enum' : None, + 'our_money': None + }) + # record 3 + self.inserted_records.append({ + 'our_decimal' : decimal.Decimal('NaN'), + 'our_double' : float('nan'), + 'our_real' : float('-inf') + }) + self.expected_records.append({ + 'id': 3, + # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support + # them and now we are at least consistent(ly wrong). + 'our_decimal' : None, + 'our_double' : None, + 'our_real' : None, + # any field without a set value will be set to NULL + 'OUR TIME': None, + 'our_text': None, + 'our_bit': None, + 'our_integer': None, + 'our_json': None, + 'our_boolean': None, + 'our_jsonb': None, + 'our_bigint': None, + 'OUR TIME TZ': None, + 'our_store': None, + 'OUR TS TZ': None, + 'our_smallint': None, + 'OUR DATE': None, + 'our_varchar': None, + 'OUR TS': None, + 'our_uuid': None, + 'our_varchar_10': None, + 'our_citext': None, + 'our_inet': None, + 'our_cidr': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None + }) + + for record in self.inserted_records: + db_utils.insert_record(cur, test_table_name, record) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_full_table_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_full_table_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_full_table_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_full_table_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'frequency_in_minutes': '1', + # 'default_replication_method' : 'LOG_BASED', + 'filter_dbs' : 'postgres,dev', + # 'ssl' : 'true', # TODO: Disabling for docker-based container + 'itersize' : '10' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_1 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_1, bookmark['version']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN and get the same 3 records + #---------------------------------------------------------------------- + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_2 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('activate_version', messages[3]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_2, table_version_1) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[0], messages[0]['data']) + self.assertDictEqual(self.expected_records[1], messages[1]['data']) + self.assertDictEqual(self.expected_records[2], messages[2]['data']) + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_2, bookmark['version']) + + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record + # Insert a record to be updated prior to sync + # Insert a record to be deleted prior to sync (NOT REPLICATED) + + # Update an existing record + # Update a newly inserted record + + # Delete an existing record + # Delete a newly inserted record + + # inserting... + # a new record + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99', + 'our_alignment_enum': None, + }) + # a new record which we will then update prior to sync + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + # a new record to be deleted prior to sync + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None, + 'our_alignment_enum': None, + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + + # updating ... + # an existing record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # a newly inserted record + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 5 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[4]["our_double"] = decimal.Decimal("6.6") + self.expected_records[4]["our_money"] = "$0.00" + + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + + # deleting + # an existing record + record_pk = 2 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # a newly inserted record + record_pk = 6 + db_utils.delete_record(cur, canon_table_name, record_pk) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN after vairous manipulations + #---------------------------------------------------------------------- + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_pks() + ) + records_by_stream = runner.get_records_from_target_output() + table_version_3 = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(4, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(5, len(messages)) + self.assertEqual('upsert', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + self.assertEqual('activate_version', messages[4]['action']) + + # verify the new table version increased on the second sync + self.assertGreater(table_version_3, table_version_2) + + # verify the persisted schema still matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + + # NB | This is a little tough to track mentally so here's a breakdown of + # the order of operations by expected records indexes: + + # Prior to Sync 1 + # insert 0, 1, 2 + + # Prior to Sync 2 + # No db changes + + # Prior to Sync 3 + # insert 3, 4, 5 + # update 0, 4 + # delete 1, 5 + + # Resulting Synced Records: 2, 3, 0, 4 + + + # verify replicated records still match expectations + self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert + self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert + self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update + self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertIsNone(bookmark.get('replication_key')) + self.assertIsNone(bookmark.get('replication_key_value')) + self.assertEqual(table_version_3, bookmark['version']) + + +SCENARIOS.add(PostgresFullTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_incremental_replication_test" +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']} + }}} + +class PostgresIncrementalTable(unittest.TestCase): + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_inet inet, + our_cidr cidr, + our_mac macaddr, + our_money money) + """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) + + cur.execute(create_table_sql) + + # insert fixture data and track expected records + self.inserted_records = [] + self.expected_records = [] + + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + + # record 1 + our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('9876543210.02'), + 'OUR TIME': str(our_time), + 'our_text': 'some text 2', + 'our_bit': True, + 'our_integer': 44101, + 'our_double': decimal.Decimal('1.1'), + 'id': 1, + 'our_json': '{"nymn": 77}', + 'our_boolean': True, + 'our_jsonb': '{"burgers": "good++"}', + 'our_bigint': 1000001, + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'our_smallint': 2, + 'OUR DATE': '1964-07-01T00:00:00+00:00', + 'our_varchar': 'our_varchar 2', + 'OUR TS': self.expected_ts(our_ts), + 'our_uuid': self.inserted_records[0]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext' : self.inserted_records[0]['our_citext'], + 'our_inet' : self.inserted_records[0]['our_inet'], + 'our_cidr' : self.inserted_records[0]['our_cidr'], + 'our_mac' : self.inserted_records[0]['our_mac'], + 'our_money' : None + }) + # record 2 + our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 2, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[1]['our_citext'], + 'our_inet' : self.inserted_records[1]['our_inet'], + 'our_cidr' : self.inserted_records[1]['our_cidr'], + 'our_mac' : self.inserted_records[1]['our_mac'], + 'our_money' : '$1,445.57' + }) + # record 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", 'our_text' : + "some text", 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : '1.1', + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(6777777), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'cyclops 1', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_money' : '$1,445.5678' + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567890.01'), + 'our_text': 'some text', + 'our_bit': False, + 'our_integer': 44100, + 'our_double': decimal.Decimal('1.1'), + 'id': 3, + 'our_json': '{"secret": 55}', + 'our_boolean': True, + 'our_jsonb': self.inserted_records[1]['our_jsonb'], + 'our_bigint': 1000000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_varchar': 'our_varchar', + 'our_uuid': self.inserted_records[2]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_10', + 'our_citext': self.inserted_records[2]['our_citext'], + 'our_inet' : self.inserted_records[2]['our_inet'], + 'our_cidr' : self.inserted_records[2]['our_cidr'], + 'our_mac' : self.inserted_records[2]['our_mac'], + 'our_money' : '$1,445.57' + }) + + for rec in self.inserted_records: + db_utils.insert_record(cur, test_table_name, rec) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_incremental_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_incremental_replication_test' } + + @staticmethod + def expected_replication_keys(): + return { + 'postgres_incremental_replication_test' : {'OUR TS TZ'} + } + @staticmethod + def expected_primary_keys(): + return { + 'postgres_incremental_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_incremental_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED' + } + + @staticmethod + def expected_ts_tz(our_ts_tz): + our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) + expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + @staticmethod + def expected_ts(our_ts): + expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") + + return expected_value + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode and verify exit codes + check_job_name = runner.run_check_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify basics of discovery are consistent with expectations... + + # verify discovery produced (at least) 1 expected catalog + found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) + if found_catalog['tap_stream_id'] in self.expected_check_streams()] + self.assertGreaterEqual(len(found_catalogs), 1) + + # verify the tap discovered the expected streams + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + self.assertSetEqual(self.expected_check_streams(), found_catalog_names) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + self.assertEqual(test_table_name, test_catalog['stream_name']) + print("discovered streams are correct") + + # perform table selection + print('selecting {} and all fields within the table'.format(test_table_name)) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + # run sync job 1 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + table_version = records_by_stream[test_table_name]['table_version'] + messages = records_by_stream[test_table_name]['messages'] + + # verify the execpted number of records were replicated + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual(4, len(messages)) + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records match expectations + self.assertDictEqual(self.expected_records[0], messages[1]['data']) + self.assertDictEqual(self.expected_records[1], messages[2]['data']) + self.assertDictEqual(self.expected_records[2], messages[3]['data']) + + # verify records are in ascending order by replication-key value + expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # grab bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify state and bookmarks meet expectations + self.assertIsNone(state['currently_syncing']) + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(table_version, bookmark['version']) + self.assertEqual(expected_replication_key, bookmark['replication_key']) + self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) + + #---------------------------------------------------------------------- + # invoke the sync job AGAIN following various manipulations to the data + #---------------------------------------------------------------------- + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # NB | We will perform the following actions prior to the next sync: + # [Action (EXPECTED RESULT)] + + # Insert a record with a lower replication-key value (NOT REPLICATED) + # Insert a record with a higher replication-key value (REPLICATED) + + # Insert a record with a higher replication-key value and... + # Delete it (NOT REPLICATED) + + # Update a record with a higher replication-key value (REPLICATED) + # Update a record with a lower replication-key value (NOT REPLICATED) + + + # inserting... + # a record with a replication-key value that is lower than the previous bookmark + nyc_tz = pytz.timezone('America/New_York') + our_time_offset = "-04:00" + our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(6,6,6) + our_time_tz = our_time.isoformat() + our_time_offset + our_date = datetime.date(1970, 7, 1) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_citext' : 'cyclops 2', + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': '$0.98789' + }) + self.expected_records.append({ + 'id': 4, + 'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + 'OUR TS' : self.expected_ts(our_ts), + 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), + 'OUR TIME' : str(our_time), + 'OUR TIME TZ' : str(our_time_tz), + 'OUR DATE' : '1970-07-01T00:00:00+00:00', + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : True, + 'our_json': '{"nymn": 77}', + 'our_jsonb': '{"burgers": "good++"}', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_citext': self.inserted_records[-1]['our_citext'], + 'our_store': {"name" : "betty", "dances" :"floor"}, + 'our_cidr': self.inserted_records[-1]['our_cidr'], + 'our_inet': self.inserted_records[-1]['our_inet'], + 'our_mac': self.inserted_records[-1]['our_mac'], + 'our_money': '$0.99' + }) + # a record with a replication-key value that is higher than the previous bookmark + our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 5, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + # a record with a replication-key value that is higher than the previous bookmark (to be deleted) + our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1999, 9, 9) + my_uuid = str(uuid.uuid1()) + self.inserted_records.append({ + 'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_3", + 'our_text' : "some text 4", + 'our_integer' : 55200, + 'our_smallint' : 1, + 'our_bigint' : 100000, + 'our_decimal' : decimal.Decimal('1234567899.99'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : decimal.Decimal('1.1'), + 'our_real' : decimal.Decimal('1.2'), + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps('some string'), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money': None, + }) + self.expected_records.append({ + 'our_decimal': decimal.Decimal('1234567899.99'), + 'our_text': 'some text 4', + 'our_bit': False, + 'our_integer': 55200, + 'our_double': decimal.Decimal('1.1'), + 'id': 6, + 'our_json': self.inserted_records[-1]['our_json'], + 'our_boolean': True, + 'our_jsonb': self.inserted_records[-1]['our_jsonb'], + 'our_bigint': 100000, + 'OUR TS': self.expected_ts(our_ts), + 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), + 'OUR TIME': str(our_time), + 'OUR TIME TZ': str(our_time_tz), + 'our_store': {"name" : "betty", "size" :"small"}, + 'our_smallint': 1, + 'OUR DATE': '1999-09-09T00:00:00+00:00', + 'our_varchar': 'our_varchar 4', + 'our_uuid': self.inserted_records[-1]['our_uuid'], + 'our_real': decimal.Decimal('1.2'), + 'our_varchar_10': 'varchar_3', + 'our_citext' : 'cyclops 3', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + 'our_money' : None + }) + + db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) + db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) + + # update a record with a replication-key value that is higher than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 1 + our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[0]["our_double"] = decimal.Decimal("6.6") + self.expected_records[0]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # update a record with a replication-key value that is lower than the previous bookmark + canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) + record_pk = 2 + our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) + our_ts_tz = nyc_tz.localize(our_ts) + updated_data = { + "OUR TS TZ": our_ts_tz, + "our_double": decimal.Decimal("6.6"), + "our_money": "$0.00" + } + self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) + self.expected_records[1]["our_double"] = decimal.Decimal("6.6") + self.expected_records[1]["our_money"] = "$0.00" + db_utils.update_record(cur, canon_table_name, record_pk, updated_data) + + # delete a newly inserted record with a higher replication key than the previous bookmark + record_pk = 5 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 2 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # grab records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were synced + self.assertEqual(3, record_count_by_stream[test_table_name]) + + # verify the message actions match expectations + self.assertEqual('activate_version', messages[0]['action']) + self.assertEqual('upsert', messages[1]['action']) + self.assertEqual('upsert', messages[2]['action']) + self.assertEqual('upsert', messages[3]['action']) + + # verify the persisted schema matches expectations + self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) + + # verify replicated records meet our expectations... + + # verify the first record was the bookmarked record from the previous sync + self.assertDictEqual(self.expected_records[2], messages[1]['data']) + + # verify the expected updated record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[0], messages[2]['data']) + + # verify the expected inserted record with a lower replication-key value was NOT replicated + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[3]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the deleted record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[4]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected updated record with a lower replication-key value was NOT replicated + expected_record_id = self.expected_records[1]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify the expected inserted record with a higher replication-key value was replicated + self.assertDictEqual(self.expected_records[5], messages[3]['data']) + + # verify records are in ascending order by replication-key value + self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) + self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + #--------------------------------------------------------------------- + # run sync AGAIN after deleting a record and get 1 record (prev bookmark) + #---------------------------------------------------------------------- + + # Delete a pre-existing record from the database + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + + # delete a record with a lower replication key than the previous sync + record_pk = 1 + db_utils.delete_record(cur, canon_table_name, record_pk) + + # run sync job 3 and verify exit codes + sync_job_name = runner.run_sync_mode(self, conn_id) + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + # get records + record_count_by_stream = runner.examine_target_output_file( + self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() + ) + records_by_stream = runner.get_records_from_target_output() + messages = records_by_stream[test_table_name]['messages'] + + # verify the expected number of records were replicated + self.assertEqual(1, record_count_by_stream[test_table_name]) + + # verify messages match our expectations + self.assertEqual(2, len(messages)) + self.assertEqual(messages[0]['action'], 'activate_version') + self.assertEqual(messages[1]['action'], 'upsert') + self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) + + # verify replicated records meet our expectations... + + # verify we did not re-replicate the deleted record + actual_record_ids = [message['data']['id'] for message in messages[1:]] + expected_record_id = self.expected_records[0]['id'] + self.assertNotIn(expected_record_id, actual_record_ids) + + # verify only the previously bookmarked record was synced + self.assertDictEqual(self.expected_records[5], messages[1]['data']) + + print("records are correct") + + # get bookmarked state + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] + + # verify the bookmarked state matches our expectations + self.assertIsNone(bookmark.get('lsn')) + self.assertEqual(bookmark['version'], table_version) + self.assertEqual(bookmark['replication_key'], expected_replication_key) + self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) + + +SCENARIOS.add(PostgresIncrementalTable) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_array_test" + + +MAX_SCALE = 38 +MAX_PRECISION = 100 +expected_schemas = {test_table_name: + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, + "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, + "exclusiveMinimum": True, + "type": ['null', "number", "array"], + "items": { + "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" + }, + "minimum": -10000000000, + "multipleOf": decimal.Decimal('0.01'), + "maximum": 10000000000}}, + 'type': 'object', + 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, + 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, + 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, + 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, + 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, + 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, + 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, + 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, + 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} + }} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql_array = [] + for k in our_keys: + if k == 'our_json_array': + value_sql_array.append("%s::json[]") + elif k == 'our_jsonb_array': + value_sql_array.append("%s::jsonb[]") + else: + value_sql_array.append("%s") + + value_sql = ",".join(value_sql_array) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepArrays(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_bit_array BIT(1)[], + our_boolean_array BOOLEAN[], + our_cidr_array CIDR[], + our_citext_array CITEXT[], + our_date_array DATE[], + our_decimal_array NUMERIC(12,2)[], + our_double_array DOUBLE PRECISION[], + our_enum_array ALIGNMENT[], + our_float_array FLOAT[], + our_hstore_array HSTORE[], + our_inet_array INET[], + our_int_array INTEGER[][], + our_int8_array INT8[], + our_json_array JSON[], + our_jsonb_array JSONB[], + our_mac_array MACADDR[], + our_money_array MONEY[], + our_real_array REAL[], + our_smallint_array SMALLINT[], + our_string_array VARCHAR[], + our_text_array TEXT[], + our_time_array TIME[], + our_ts_tz_array TIMESTAMP WITH TIME ZONE[], + our_uuid_array UUID[]) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_array_test'} + + @staticmethod + def expected_sync_streams(): + return { test_table_name } + + @staticmethod + def expected_pks(): + return { + test_table_name : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_arrays" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual(test_table_name, test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { test_table_name: 0}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream[test_table_name]['table_version'] + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record") + + our_ts_tz = None + our_date = None + our_uuid = str(uuid.uuid1()) + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 2 + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_date = datetime.date(1998, 3, 4) + + self.rec_1 = { + 'our_bit_array' : '{{0,1,1}}', + 'our_boolean_array' : '{true}', + 'our_cidr_array' : '{{192.168.100.128/25}}', + 'our_citext_array' : '{{maGICKal 2}}', + 'our_date_array' : '{{{}}}'.format(our_date), + 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), + 'our_double_array' : '{{1.232323}}', + 'our_enum_array' : '{{bad}}', + 'our_float_array' : '{{5.23}}', + 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", + 'our_inet_array' : '{{192.168.100.128/24}}', + 'our_int_array' : '{{1,2,3},{4,5,6}}', + 'our_int8_array' : '{16,32,64}', + 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], + 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], + 'our_mac_array' : '{{08:00:2b:01:02:03}}', + 'our_money_array' : '{{$412.1234}}', + 'our_real_array' : '{{76.33}}', + 'our_smallint_array' : '{{10,20,30},{40,50,60}}', + 'our_string_array' : '{{one string, two strings}}', + 'our_text_array' : '{{three string, four}}', + 'our_time_array' : '{{03:04:05}}', + 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), + 'our_uuid_array' : '{{{}}}'.format(our_uuid)} + + + insert_record(cur, test_table_name, self.rec_1) + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { test_table_name: 1 }) + records_by_stream = runner.get_records_from_target_output() + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) + actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] + + expected_inserted_record = {'id': 1, + '_sdc_deleted_at': None, + 'our_bit_array' : [[False, True, True]], + 'our_boolean_array' : [True], + 'our_cidr_array' : [['192.168.100.128/25']], + 'our_citext_array' : [['maGICKal 2']], + 'our_date_array' : ['1998-03-04T00:00:00+00:00'], + 'our_decimal_array' : [decimal.Decimal('1234567890.01')], + 'our_double_array' : [[decimal.Decimal('1.232323')]], + 'our_enum_array' : [['bad']], + 'our_float_array' : [[decimal.Decimal('5.23')]], + 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], + 'our_inet_array' : [['192.168.100.128/24']], + 'our_int_array' : [[1,2,3],[4,5,6]], + 'our_int8_array' : [16,32,64], + 'our_json_array' : [json.dumps({'secret' : 55})], + 'our_jsonb_array' : [json.dumps({'secret' : 69})], + 'our_mac_array' : [['08:00:2b:01:02:03']], + 'our_money_array' : [['$412.12']], + 'our_real_array' : [[decimal.Decimal('76.33')]], + 'our_smallint_array' : [[10,20,30],[40,50,60]], + 'our_string_array' : [['one string', 'two strings']], + 'our_text_array' : [['three string', 'four']], + 'our_time_array' : [['03:04:05']], + 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], + 'our_uuid_array' : ['{}'.format(our_uuid)] + + } + + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k in actual_record_1.keys(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + +SCENARIOS.add(PostgresLogicalRepArrays) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleDBs(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + db_utils.ensure_db('postgres') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_dev') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_dev") + cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_cows]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) + + #create dev_cows + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch_postgres') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('postgres', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch_postgres") + cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') + + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name_chickens]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) + + + #create postgres_chickens + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'public_postgres_logical_replication_test_cows' : {'id'}, + 'public_postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_dbs" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'include_schemas_in_destination_stream_name' : 'true', + 'debug_lsn': 'true', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + #run sync job + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 1 more cows and 1 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + + with db_utils.get_test_connection('postgres') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) + + upserts = [] + for u in runner.get_upserts_from_target_output(): + self.assertIsNotNone(u.get('_sdc_lsn')) + del u['_sdc_lsn'] + upserts.append(u) + + self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, + {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], + upserts) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleDBs) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test_cows': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, + + 'postgres_logical_replication_test_chickens': + {'type': 'object', + 'selected': True, + 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, + 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, + 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name_cows = "postgres_logical_replication_test_cows" +test_table_name_chickens = "postgres_logical_replication_test_chickens" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRepMultipleTables(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + for t in [test_table_name_cows, test_table_name_chickens]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, t]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) + + + cur = conn.cursor() + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + cow_age integer, + cow_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) + cur.execute(create_table_sql) + + create_table_sql = """ + CREATE TABLE {} (id SERIAL PRIMARY KEY, + chicken_age integer, + chicken_name varchar) + """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) + cur.execute(create_table_sql) + + #insert a cow + self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} + insert_record(cur, test_table_name_cows, self.cows_rec_1) + + #insert a chicken + self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chickens_rec_1) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test_cows' : {'id'}, + 'postgres_logical_replication_test_chickens' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_multiple_tables" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 2, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + + test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) + + + test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] + self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, + menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), + additional_md) + connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, + menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), + additional_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') + + table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_cows_1 = bookmark_cows['lsn'] + self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") + + bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") + lsn_chickens_1 = bookmark_chickens['lsn'] + self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding records + #---------------------------------------------------------------------- + print("inserting 2 more cows and 2 more chickens") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + # insert another cow + self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} + insert_record(cur, test_table_name_cows, self.cows_rec_2) + # update that cow's expected values + self.cows_rec_2['id'] = 2 + self.cows_rec_2['_sdc_deleted_at'] = None + + # insert another chicken + self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} + insert_record(cur, test_table_name_chickens, self.chicken_rec_2) + # update that cow's expected values + self.chicken_rec_2['id'] = 2 + self.chicken_rec_2['_sdc_deleted_at'] = None + + # and repeat... + + self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} + insert_record(cur, test_table_name_cows, self.cows_rec_3) + self.cows_rec_3['id'] = 3 + self.cows_rec_3['_sdc_deleted_at'] = None + + + self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} + insert_record(cur, test_table_name_chickens, self.chicken_rec_3) + self.chicken_rec_3['id'] = 3 + self.chicken_rec_3['_sdc_deleted_at'] = None + + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) + records_by_stream = runner.get_records_from_target_output() + chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] + cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] + + self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) + self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) + self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) + self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) + + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] + self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_cows_2 = cows_bookmark['lsn'] + self.assertTrue(lsn_cows_2 >= lsn_cows_1) + + chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] + self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_chickens_2 = chickens_bookmark['lsn'] + self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) + + #table_version does NOT change + self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #table_version does NOT change + self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + +SCENARIOS.add(PostgresLogicalRepMultipleTables) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db('dev') + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + #insert fixture data 3 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_3 = {'our_varchar' : "our_varchar 3", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_3) + + #insert fixture data 4 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_4 = {'our_varchar' : "our_varchar 4", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_4) + + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '1' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 5") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_5 = {'our_varchar' : "our_varchar 5", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_5) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 5', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_5['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 5, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_5['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_5['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_5['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + #the 1st message will be the previous insert + insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in insert_message.items(): + self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) + + + #the 2nd message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_4 = bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record using the 'id IN (, )' format + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #first record will be the previous delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 2) + + + + #the 2nd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + print("deleted record is correct") + + #the 3rd message will be the more recent delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + print("deleted record is correct") + + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_5 = bookmark['lsn'] + self.assertTrue(lsn_5 >= lsn_4) + + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) + #first record will be the previous first delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 4) + + #second record will be the previous second delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 5) + + #third record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_6 = chicken_bookmark['lsn'] + self.assertTrue(lsn_6 >= lsn_5) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we will get the previous update record again + self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) + # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives + update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] + self.assertEqual(update_message['action'], 'upsert') + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_7 = chicken_bookmark['lsn'] + self.assertTrue(lsn_7 >= lsn_6) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import decimal +import unittest +import datetime +import uuid +import json + +import pytz +import psycopg2.extras +from psycopg2.extensions import quote_ident +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'postgres_logical_replication_test': + {'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }, + 'type': 'object', + 'properties': {'our_boolean': {'type': ['null', 'boolean']}, + '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, + 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, + 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, + 'our_real': {'type': ['null', 'number']}, + 'our_uuid': {'type': ['null', 'string']}, + 'our_store': {'type': ['null', 'object'], 'properties' : {}}, + 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, + 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], + 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, + 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, + 'our_jsonb': {'type': ['null', 'string']}, + 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, + 'our_text': {'type': ['null', 'string']}, + 'our_text_2': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, + 'our_double': {'type': ['null', 'number']}, + 'our_varchar': {'type': ['null', 'string']}, + 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, + 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, + 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, + 'OUR TIME': {'type': ['null', 'string']}, + 'OUR TIME TZ': {'type': ['null', 'string']}, + 'our_bit': {'type': ['null', 'boolean']}, + 'our_citext': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, + 'our_mac': {'type': ['null', 'string']}, + 'our_alignment_enum': {'type': ['null', 'string']}, + 'our_money': {'type': ['null', 'string']}}}} + + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + +test_schema_name = "public" +test_table_name = "postgres_logical_replication_test" + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + + +class PostgresLogicalRep(unittest.TestCase): + def tearDown(self): + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db("dev") + + self.maxDiff = None + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT EXISTS (SELECT 1 + FROM pg_replication_slots + WHERE slot_name = 'stitch') """) + + old_slot = cur.fetchone()[0] + with db_utils.get_test_connection('dev', True) as conn2: + with conn2.cursor() as cur2: + if old_slot: + cur2.drop_replication_slot("stitch") + cur2.create_replication_slot('stitch', output_plugin='wal2json') + + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s);""", + [test_schema_name, test_table_name]) + old_table = cur.fetchone()[0] + + if old_table: + cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + + cur = conn.cursor() + cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) + if cur.fetchone()[0] is None: + cur.execute(""" CREATE EXTENSION hstore; """) + + cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") + cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) + cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) + + + create_table_sql = """ +CREATE TABLE {} (id SERIAL PRIMARY KEY, + our_varchar VARCHAR, + our_varchar_10 VARCHAR(10), + our_text TEXT, + our_text_2 TEXT, + our_integer INTEGER, + our_smallint SMALLINT, + our_bigint BIGINT, + our_decimal NUMERIC(12,2), + "OUR TS" TIMESTAMP WITHOUT TIME ZONE, + "OUR TS TZ" TIMESTAMP WITH TIME ZONE, + "OUR TIME" TIME WITHOUT TIME ZONE, + "OUR TIME TZ" TIME WITH TIME ZONE, + "OUR DATE" DATE, + our_double DOUBLE PRECISION, + our_real REAL, + our_boolean BOOLEAN, + our_bit BIT(1), + our_json JSON, + our_jsonb JSONB, + our_uuid UUID, + our_store HSTORE, + our_citext CITEXT, + our_cidr cidr, + our_inet inet, + our_mac macaddr, + our_alignment_enum ALIGNMENT, + our_money money) + """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) + + cur.execute(create_table_sql) + + #insert fixture data 1 + our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(12,11,10) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1998, 3, 4) + my_uuid = str(uuid.uuid1()) + + self.rec_1 = {'our_varchar' : "our_varchar", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44100, + 'our_smallint' : 1, 'our_bigint' : 1000000, + 'our_decimal' : decimal.Decimal('1234567890.01'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '0', + 'our_json' : json.dumps({'secret' : 55}), + 'our_jsonb' : json.dumps(['burgers are good']), + 'our_uuid' : my_uuid, + 'our_store' : 'size=>"small",name=>"betty"', + 'our_citext': 'maGICKal', + 'our_cidr' : '192.168.100.128/25', + 'our_inet': '192.168.100.128/24', + 'our_mac' : '08:00:2b:01:02:03', + 'our_alignment_enum': 'bad'} + + + insert_record(cur, test_table_name, self.rec_1) + + #insert fixture data 2 + our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(10,9,8) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1964, 7, 1) + my_uuid = str(uuid.uuid1()) + + self.rec_2 = {'our_varchar' : "our_varchar 2", + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text 2", + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 44101, + 'our_smallint' : 2, + 'our_bigint' : 1000001, + 'our_decimal' : decimal.Decimal('9876543210.02'), + quote_ident('OUR TS', cur) : our_ts, + quote_ident('OUR TS TZ', cur) : our_ts_tz, + quote_ident('OUR TIME', cur) : our_time, + quote_ident('OUR TIME TZ', cur) : our_time_tz, + quote_ident('OUR DATE', cur) : our_date, + 'our_double' : 1.1, + 'our_real' : 1.2, + 'our_boolean' : True, + 'our_bit' : '1', + 'our_json' : json.dumps({'nymn' : 77}), + 'our_jsonb' : json.dumps({'burgers' : 'good++'}), + 'our_uuid' : my_uuid, + 'our_store' : 'dances=>"floor",name=>"betty"', + 'our_citext': 'maGICKal 2', + 'our_cidr' : '192.168.101.128/25', + 'our_inet': '192.168.101.128/24', + 'our_mac' : '08:00:2b:01:02:04', + } + + insert_record(cur, test_table_name, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'dev-public-postgres_logical_replication_test'} + + @staticmethod + def expected_sync_streams(): + return { 'postgres_logical_replication_test' } + + @staticmethod + def expected_pks(): + return { + 'postgres_logical_replication_test' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def name(): + return "tap_tester_postgres_logical_replication_v2_message" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'LOG_BASED', + 'logical_poll_total_seconds': '10', + 'wal2json_message_format': '2' + } + + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + + self.assertGreaterEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + test_catalog = found_catalogs[0] + + self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) + + print("discovered streams are correct") + + additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] + #don't selcted our_text_2 + _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, + menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), + additional_md, + ['our_text_2']) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['postgres_logical_replication_test']['table_version'] + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], + 'activate_version') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], + 'upsert') + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], + 'activate_version') + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream to have an lsn") + lsn_1 = bookmark['lsn'] + + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job again after adding a record + #---------------------------------------------------------------------- + print("inserting a record 3") + + with db_utils.get_test_connection('dev') as conn: + conn.autocommit = True + with conn.cursor() as cur: + #insert fixture data 3 + our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) + nyc_tz = pytz.timezone('America/New_York') + our_ts_tz = nyc_tz.localize(our_ts) + our_time = datetime.time(3,4,5) + our_time_tz = our_time.isoformat() + "-04:00" + our_date = datetime.date(1933, 3, 3) + my_uuid = str(uuid.uuid1()) + + #STRINGS: + #OUR TS: '1993-03-03 03:03:03.333333' + #OUR TS TZ: '1993-03-03 08:03:03.333333+00' + #'OUR TIME': '03:04:05' + #'OUR TIME TZ': '03:04:05+00' + self.rec_3 = {'our_varchar' : "our_varchar 3", # str + 'our_varchar_10' : "varchar13", # str + 'our_text' : "some text 3", #str + 'our_text_2' : "NOT SELECTED", + 'our_integer' : 96000, #int + 'our_smallint' : 3, # int + 'our_bigint' : 3000000, #int + 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a + quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' + quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' + quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' + quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' + quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a + 'our_double' : 3.3, #3.3 / our_double is a + 'our_real' : 6.6, #6.6 / our_real is a + 'our_boolean' : True, #boolean + 'our_bit' : '1', #string + 'our_json' : json.dumps({'secret' : 33}), #string + 'our_jsonb' : json.dumps(['burgers make me hungry']), + 'our_uuid' : my_uuid, #string + 'our_store' : 'jumps=>"high",name=>"betty"', #string + 'our_citext': 'maGICKal 3', + 'our_cidr' : '192.168.102.128/32', + 'our_inet': '192.168.102.128/32', + 'our_mac' : '08:00:2b:01:02:05', + 'our_money': '$412.1234' + } + + insert_record(cur, test_table_name, self.rec_3) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + self.assertTrue(len(records_by_stream) > 0) + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) + actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] + + expected_inserted_record = {'our_text': 'some text 3', + 'our_real': decimal.Decimal('6.6'), + '_sdc_deleted_at': None, + 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, + 'our_bigint': 3000000, + 'our_varchar': 'our_varchar 3', + 'our_double': decimal.Decimal('3.3'), + 'our_bit': True, + 'our_uuid': self.rec_3['our_uuid'], + 'OUR TS': '1993-03-03T03:03:03.333333+00:00', + 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', + 'OUR TIME': '03:04:05', + 'OUR TIME TZ': '03:04:05-04:00', + 'OUR DATE': '1933-03-03T00:00:00+00:00', + 'our_decimal': decimal.Decimal('1234567890.03'), + 'id': 3, + 'our_varchar_10': 'varchar13', + 'our_json': '{"secret": 33}', + 'our_jsonb': self.rec_3['our_jsonb'], + 'our_smallint': 3, + 'our_integer': 96000, + 'our_boolean': True, + 'our_citext': 'maGICKal 3', + 'our_cidr': self.rec_3['our_cidr'], + 'our_inet': '192.168.102.128', + 'our_mac': self.rec_3['our_mac'], + 'our_alignment_enum' : None, + 'our_money' :'$412.12' + } + self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) + + for k,v in actual_record_1.items(): + self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) + + self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') + print("inserted record is correct") + + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_2 = chicken_bookmark['lsn'] + + self.assertTrue(lsn_2 >= lsn_1) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + #---------------------------------------------------------------------- + # invoke the sync job again after deleting a record + #---------------------------------------------------------------------- + print("delete row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + #the message will be the delete + delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(delete_message['action'], 'upsert') + + sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') + self.assertIsNotNone(sdc_deleted_at) + self.assertEqual(delete_message['data']['id'], 3) + print("deleted record is correct") + + state = menagerie.get_state(conn_id) + bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + + self.assertIsNotNone(bookmark['lsn'], + msg="expected bookmark for stream ROOT-CHICKEN to have an scn") + + lsn_3 = bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(bookmark['version'], table_version, + msg="expected bookmark for stream postgres_logical_replication_test to match version") + #---------------------------------------------------------------------- + # invoke the sync job again after updating a record + #---------------------------------------------------------------------- + print("updating row from source db") + with db_utils.get_test_connection('dev') as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) + + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) + records_by_stream = runner.get_records_from_target_output() + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) + + #record will be the new update + update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] + self.assertEqual(update_message['action'], 'upsert') + + expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', + 'id' : 1, + 'our_varchar_10' : "varchar_10", + 'our_text' : "some text", + 'our_integer' : 44100, + 'our_smallint' : 1, + 'our_bigint' : 1000000, + 'our_decimal' : None, + 'OUR TS': '1997-02-02T02:02:02.722184+00:00', + 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', + 'OUR TIME' : '12:11:10', + 'OUR TIME TZ' : '12:11:10-04:00', + 'OUR DATE': '1998-03-04T00:00:00+00:00', + 'our_double' : None, + 'our_real' : None, + 'our_boolean' : True, + 'our_bit' : False, + 'our_json' : '{"secret": 55}', + 'our_jsonb' : self.rec_1['our_jsonb'], + 'our_uuid' : self.rec_1['our_uuid'], + '_sdc_deleted_at' : None, + 'our_store' : {'name' : 'betty', 'size' : 'small' }, + 'our_citext': 'maGICKal', + 'our_cidr': self.rec_1['our_cidr'], + 'our_inet': self.rec_1['our_inet'], + 'our_mac': self.rec_1['our_mac'], + 'our_alignment_enum' : 'bad', + 'our_money' : '$56.81' + } + + self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), + msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) + + + for k,v in update_message['data'].items(): + self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) + + print("updated record is correct") + + #check state again + state = menagerie.get_state(conn_id) + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_3 = chicken_bookmark['lsn'] + self.assertTrue(lsn_3 >= lsn_2) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + + + #---------------------------------------------------------------------- + # invoke the sync job one last time. should only get the PREVIOUS update + #---------------------------------------------------------------------- + sync_job_name = runner.run_sync_mode(self, conn_id) + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + #we should not get any records + self.assertEqual(record_count_by_stream, {}) + + #check state again + state = menagerie.get_state(conn_id) + chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertIsNotNone(chicken_bookmark['lsn'], + msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") + lsn_4 = chicken_bookmark['lsn'] + self.assertTrue(lsn_4 >= lsn_3) + + #table_version does NOT change + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream public-postgres_logical_replication_test to match version") + +SCENARIOS.add(PostgresLogicalRep) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsFullTable(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_full_table" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER') + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + + +SCENARIOS.add(PostgresViewsFullTable) +import os +import datetime +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + + +expected_schemas = {'chicken_view': + {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'updated_at': {'format': 'date-time', + 'type': ['null', 'string']}}, + 'type': 'object', + 'definitions' : { + 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, + 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, + 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, + 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, + 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, + 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} + }}} + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsIncrementalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + updated_at TIMESTAMP WITH TIME ZONE, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_incremental_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties, updated_at is replication_key + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + + self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) + records_by_stream = runner.get_records_from_target_output() + + table_version = records_by_stream['chicken_view']['table_version'] + self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) + self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') + self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') + + # verifications about individual records + for stream, recs in records_by_stream.items(): + # verify the persisted schema was correct + self.assertEqual(recs['schema'], + expected_schemas[stream], + msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) + + actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] + + expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} + self.assertEqual(actual_chicken_record, + expected_chicken_record, + msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) + + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + + chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] + self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") + self.assertEqual(chicken_bookmark['version'], table_version, + msg="expected bookmark for stream ROOT-CHICKEN to match version") + self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') + self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') + print("bookmarks are correct") + + # TODO Verify expected fields have inclusion of 'automatic' + +SCENARIOS.add(PostgresViewsIncrementalReplication) +import os +import unittest + +import psycopg2.extras +from psycopg2.extensions import quote_ident +from singer import metadata +from tap_tester.scenario import (SCENARIOS) +import tap_tester.connections as connections +import tap_tester.menagerie as menagerie +import tap_tester.runner as runner + +import db_utils # pylint: disable=import-error + +expected_schemas = {'chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} + + +def canonicalized_table_name(schema, table, cur): + return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) + +def insert_record(cursor, table_name, data): + our_keys = list(data.keys()) + our_keys.sort() + our_values = [data.get(key) for key in our_keys] + + columns_sql = ", \n ".join(our_keys) + value_sql = ",".join(["%s" for i in range(len(our_keys))]) + + insert_sql = """ INSERT INTO {} + ( {} ) + VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) + cursor.execute(insert_sql, our_values) + + + +test_schema_name = "public" +test_table_name_1 = "postgres_views_full_table_replication_test" +test_table_name_2 = "postgres_views_full_table_replication_test_2" +test_view = 'chicken_view' + +class PostgresViewsLogicalReplication(unittest.TestCase): + def setUp(self): + db_utils.ensure_environment_variables_set() + + db_utils.ensure_db() + + self.maxDiff = None + + with db_utils.get_test_connection() as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + for table in [test_table_name_1, test_table_name_2]: + old_table = cur.execute("""SELECT EXISTS ( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = %s + AND table_name = %s)""", + [test_schema_name, table]) + old_table = cur.fetchone()[0] + if old_table: + cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) + + + cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) + cur.execute("""CREATE TABLE {} + (id SERIAL PRIMARY KEY, + name VARCHAR, + size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + + cur.execute("""CREATE TABLE {} + (fk_id bigint, + age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + cur.execute("""CREATE VIEW {} AS + (SELECT * + FROM {} + join {} + on {}.id = {}.fk_id + )""".format(quote_ident(test_view, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur), + canonicalized_table_name(test_schema_name, test_table_name_1, cur), + canonicalized_table_name(test_schema_name, test_table_name_2, cur))) + + self.rec_1 = { 'name' : 'fred', 'size' : 'big' } + insert_record(cur, test_table_name_1, self.rec_1) + + cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) + fk_id = cur.fetchone()[0] + + self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } + insert_record(cur, test_table_name_2, self.rec_2) + + @staticmethod + def expected_check_streams(): + return { 'postgres-public-chicken_view'} + + @staticmethod + def expected_sync_streams(): + return { 'chicken_view' } + + @staticmethod + def name(): + return "tap_tester_postgres_views_logical_replication" + + @staticmethod + def expected_pks(): + return { + 'chicken_view' : {'id'} + } + + @staticmethod + def tap_name(): + return "tap-postgres" + + @staticmethod + def get_type(): + return "platform.postgres" + + @staticmethod + def get_credentials(): + return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} + + @staticmethod + def get_properties(): + return {'host' : os.getenv('TAP_POSTGRES_HOST'), + 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), + 'port' : os.getenv('TAP_POSTGRES_PORT'), + 'user' : os.getenv('TAP_POSTGRES_USER'), + 'default_replication_method' : 'FULL_TABLE' + } + + def test_run(self): + conn_id = connections.ensure_connection(self) + + # run in check mode + check_job_name = runner.run_check_mode(self, conn_id) + + # verify check exit codes + exit_status = menagerie.get_exit_status(conn_id, check_job_name) + menagerie.verify_check_exit_status(self, exit_status, check_job_name) + + # verify the tap discovered the right streams + found_catalogs = [fc for fc + in menagerie.get_catalogs(conn_id) + if fc['tap_stream_id'] in self.expected_check_streams()] + + self.assertEqual(len(found_catalogs), + 1, + msg="unable to locate schemas for connection {}".format(conn_id)) + + found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + diff = self.expected_check_streams().symmetric_difference(found_catalog_names) + self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) + + # verify that persisted streams have the correct properties + chicken_catalog = found_catalogs[0] + + self.assertEqual('chicken_view', chicken_catalog['stream_name']) + print("discovered streams are correct") + + print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] + + self.assertEqual( + {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, + ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, + ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, + ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, + ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, + metadata.to_map(md)) + + + # 'ID' selected as view-key-properties + replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] + + connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, + menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), + replication_md) + + # clear state + menagerie.set_state(conn_id, {}) + + sync_job_name = runner.run_sync_mode(self, conn_id) + + # verify tap and target exit codes + exit_status = menagerie.get_exit_status(conn_id, sync_job_name) + + self.assertEqual(exit_status['tap_exit_status'], 1) + # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) + + record_count_by_stream = runner.examine_target_output_file(self, + conn_id, + self.expected_sync_streams(), + self.expected_pks()) + + self.assertEqual(record_count_by_stream, {}) + print("records are correct") + + # verify state and bookmarks + state = menagerie.get_state(conn_id) + self.assertEqual(state, {}, msg="expected state to be empty") + + + + +SCENARIOS.add(PostgresViewsLogicalReplication) From 0454483f1042b8a37df37a624657b447ff4f5bb9 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Fri, 2 Apr 2021 21:30:23 +0000 Subject: [PATCH 13/26] wip datatypes test case additions --- tests/test_postgres_datatypes.py | 807 ++++++++++++++++++++++++------- 1 file changed, 621 insertions(+), 186 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index 45d7660..84f0cc3 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -21,6 +21,7 @@ test_table_name = "postgres_datatypes_test" test_db = "dev" +# TODO manually verify this schema meets our expectations expected_schema = {'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, 'OUR TIME': {'type': ['null', 'string']}, 'OUR TIME TZ': {'type': ['null', 'string']}, @@ -126,10 +127,12 @@ class PostgresDatatypes(unittest.TestCase): Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point when precision is explicitly stated, maximum is 1000 digits TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN + [x] Generate 3 different fields with NUMERIC, + [] NUMERIC(precision, scale), + [x] NUMERIC(precision). + [x] Cover Maximum precision and scale + [x] Cover Minimum precision and scale + [x] Cover NaN Floating-Point Types @@ -138,67 +141,39 @@ class PostgresDatatypes(unittest.TestCase): - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. TODOs - - Cover NaN, -Inf, Inf - - + [x] Cover NaN, -Inf, Inf + [x] Zero Character - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Cover the following character sets: - LATIN1 ISO 8859-1, ECMA 94 Western European Yes 1 ISO88591 - LATIN2 ISO 8859-2, ECMA 94 Central European Yes 1 ISO88592 - LATIN3 ISO 8859-3, ECMA 94 South European Yes 1 ISO88593 - LATIN4 ISO 8859-4, ECMA 94 North European Yes 1 ISO88594 - LATIN5 ISO 8859-9, ECMA 128 Turkish Yes 1 ISO88599 - LATIN6 ISO 8859-10, ECMA 144 Nordic Yes 1 ISO885910 - LATIN7 ISO 8859-13 Baltic Yes 1 ISO885913 - LATIN8 ISO 8859-14 Celtic Yes 1 ISO885914 - LATIN9 ISO 8859-15 LATIN1 with Euro and accents Yes 1 ISO885915 - LATIN10 ISO 8859-16, ASRO SR 14111 Romanian Yes 1 ISO885916 - UTF8 Unicode, 8-bit all Yes 1-4 Unicode - - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape + [x] Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) + [x] VARCHAR(10485760) + [] Generate a 1 GB string?? -- Not Possbile, but we can approach 20 MB + [] text - cover one of every character that is allowed + [] UTF8 Unicode, 8-bit all Yes 1-4 Unicode + [] LATIN1 ISO 8859-1, ECMA 94 Western European Yes 1 ISO88591 + [] LATIN2 ISO 8859-2, ECMA 94 Central European Yes 1 ISO88592 + [] LATIN3 ISO 8859-3, ECMA 94 South European Yes 1 ISO88593 + [] LATIN4 ISO 8859-4, ECMA 94 North European Yes 1 ISO88594 + [] LATIN5 ISO 8859-9, ECMA 128 Turkish Yes 1 ISO88599 + [] LATIN6 ISO 8859-10, ECMA 144 Nordic Yes 1 ISO885910 + [] LATIN7 ISO 8859-13 Baltic Yes 1 ISO885913 + [] LATIN8 ISO 8859-14 Celtic Yes 1 ISO885914 + [] LATIN9 ISO 8859-15 LATIN1 with Euro and accents Yes 1 ISO885915 + [] LATIN10 ISO 8859-16, ASRO SR 14111 Romanian Yes 1 ISO885916 + [] investigate if we need to change COLLATION in order to accomplish ALL POSSIBLE CHARACTERS Network Address Types TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Test all precisions 0..6 - - UUID - TODOs - - uuid.uuid1(node=None, clock_seq=None) - Generate a UUID from a host ID, sequence number, and the current time. If node is not given, getnode() is used to obtain the hardware address. If clock_seq is given, it is used as the sequence number; otherwise a random 14-bit sequence number is chosen. - - - uuid.uuid3(namespace, name) - Generate a UUID based on the MD5 hash of a namespace identifier (which is a UUID) and a name (which is a string). + [x] min and max for cider/inet 000's fff's + [x] ipv6 and ipv4 - - uuid.uuid4() - Generate a random UUID. - - uuid.uuid5(namespace, name) - Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a UUID) and a name (which is a string). + Datetimes + TODOs + [x] Test all precisions 0..6 fractional seconds """ @@ -292,6 +267,8 @@ def setUp(self): our_alignment_enum ALIGNMENT, our_money money, our_bigserial BIGSERIAL, + our_serial SERIAL, + our_smallserial SMALLSERIAL, unsupported_bit BIT(80), unsupported_bit_varying BIT VARYING(80), unsupported_box BOX, @@ -304,8 +281,6 @@ def setUp(self): unsupported_pg_lsn PG_LSN, unsupported_point POINT, unsupported_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, unsupported_tsquery TSQUERY, unsupported_tsvector TSVECTOR, unsupported_txid_snapshot TXID_SNAPSHOT, @@ -320,8 +295,10 @@ def setUp(self): self.inserted_records = [] self.expected_records = dict() - + # TODO test out of bounds precision for DECIMAL + # insert a record wtih minimum values + test_case = 'minimum_boundary_general' our_tz = pytz.timezone('Singapore') # GMT+8 min_date = datetime.date(1, 1, 1) my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} @@ -339,7 +316,7 @@ def setUp(self): 'our_varchar_big': "", # VARCHAR(10485760), 'our_char_big': "a", # CHAR(10485760), 'our_text': "", # TEXT - 'our_text_2': "", # TEXT, TODO move our_ascii into it's own record + 'our_text_2': "", # TEXT, 'our_integer': -2147483648, # INTEGER, 'our_smallint': -32768, # SMALLINT, 'our_bigint': -9223372036854775808, # BIGINT, @@ -352,7 +329,7 @@ def setUp(self): quote_ident('OUR TIME', cur): '00:00:00.000001', # TIME WITHOUT TIME ZONE, quote_ident('OUR TIME TZ', cur): '00:00:00.000001-15:59', # TIME WITH TIME ZONE, quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': -1.79769313486231e+308, # DOUBLE PRECISION + 'our_double': decimal.Decimal('-1.79769313486231e+308'), # DOUBLE PRECISION 'our_real': decimal.Decimal('-3.40282e+38'), # REAL, 'our_boolean': False, # BOOLEAN, 'our_bit': '0', # BIT(1), @@ -361,19 +338,18 @@ def setUp(self): 'our_uuid': '00000000-0000-0000-0000-000000000000', # str(uuid.uuid1()) 'our_hstore': None, # HSTORE, 'our_citext': "", # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, + 'our_cidr': '00.000.000.000/32', # cidr, + 'our_inet': '00.000.000.000', # inet, + 'our_mac': '00:00:00:00:00:00', ## macaddr 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error + 'our_money': '-$92,233,720,368,547,758.08', # money, 'our_bigserial': 1, # BIGSERIAL, 'our_serial': 1, # SERIAL, 'our_smallserial': 1, # SMALLSERIAL, }) - self.expected_records['minimum_boundary_general'] = copy.deepcopy(self.inserted_records[-1]) - self.expected_records['minimum_boundary_general'].update({ + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_double': decimal.Decimal('-1.79769313486231e+308'), 'OUR TS': '0001-01-01T00:00:00.000001+00:00', 'OUR TS TZ': '0001-01-01T15:59:00.000001+00:00', 'OUR TIME': '00:00:00.000001', @@ -381,27 +357,23 @@ def setUp(self): 'OUR DATE': '0001-01-01T00:00:00+00:00', 'our_bit': False, 'our_jsonb': json.loads(self.inserted_records[-1]['our_jsonb']), - 'our_inet': '12.244.233.165', + 'our_cidr': '0.0.0.0/32', + 'our_inet': '0.0.0.0', + 'our_mac': '00:00:00:00:00:00', }) - my_keys = set(self.expected_records['minimum_boundary_general'].keys()) + my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: if key.startswith('"'): - del self.expected_records['minimum_boundary_general'][key] - + del self.expected_records[test_case][key] db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) # insert a record wtih maximum values + test_case = 'maximum_boundary_general' max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" max_date = datetime.date(9999, 12, 31) base_string = "Bread Sticks From Olive Garden" my_absurdly_large_decimal = decimal.Decimal('9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} - # 🥖 = 1f956 self.inserted_records.append({ 'id': 2147483647, # SERIAL PRIMARY KEY, 'our_char': "🥖", # CHAR, @@ -420,11 +392,10 @@ def setUp(self): quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, quote_ident('OUR TS TZ', cur): '9999-12-31T08:00:59.999999-15:59', #max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? quote_ident('OUR TIME TZ', cur): '23:59:59.999999+1559', # TIME WITH TIME ZONE, quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': decimal.Decimal('9.99999999999999'), # '1E308', # DOUBLE PRECISION, - 'our_real': decimal.Decimal('9.99999'), # '1E308', # REAL, # TODO + 'our_double': decimal.Decimal('1.79769313486231e+308'), # DOUBLE PRECISION, + 'our_real': decimal.Decimal('3.40282e+38'), # '1E308', # REAL, 'our_boolean': True, # BOOLEAN 'our_bit': '1', # BIT(1), 'our_json': json.dumps({ @@ -438,7 +409,7 @@ def setUp(self): 'our_json_boolean': True, 'our_json_null': None, }, - 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], 'our_json_boolean': True, 'our_json_null': None, }), # JSON, @@ -453,25 +424,24 @@ def setUp(self): 'our_jsonb_boolean': True, 'our_jsonb_null': None, }, - 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], 'our_jsonb_boolean': True, 'our_jsonb_null': None, }), # JSONB, 'our_uuid':'ffffffff-ffff-ffff-ffff-ffffffffffff', # UUID, 'our_hstore': '"foo"=>"bar","bar"=>"foo","dumdum"=>Null', # HSTORE, 'our_citext': "aPpLeS", # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/24',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr + 'our_cidr': '199.199.199.128/32', # # cidr, + 'our_inet': '199.199.199.128', # inet, + 'our_mac': 'ff:ff:ff:ff:ff:ff', # macaddr 'our_alignment_enum': 'u g l y', # ALIGNMENT, 'our_money': "$92,233,720,368,547,758.07", # money, 'our_bigserial': 9223372036854775807, # BIGSERIAL, 'our_serial': 2147483647, # SERIAL, 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, }) - - self.expected_records['maximum_boundary_general'] = copy.deepcopy(self.inserted_records[-1]) - self.expected_records['maximum_boundary_general'].update({ + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ 'OUR TS': '9999-12-31T23:59:59.999999+00:00', 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', 'OUR TIME': '23:59:59.999999', @@ -479,21 +449,24 @@ def setUp(self): 'OUR DATE': '9999-12-31T00:00:00+00:00', 'our_char_big': "🥖" + " " * 10485759, 'our_bit': True, - 'our_cidr': '2001:db8::ff00:42:7879/128', 'our_jsonb': json.loads(self.inserted_records[-1]['our_jsonb']), 'our_hstore': {'foo': 'bar', 'bar': 'foo', 'dumdum': None}, }) - my_keys = set(self.expected_records['maximum_boundary_general'].keys()) + my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: if key.startswith('"'): - del self.expected_records['maximum_boundary_general'][key] - + del self.expected_records[test_case][key] db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) # insert a record with valid values for unsupported types + test_case = 'unsupported_types' + our_serial = 9999 self.inserted_records.append({ - 'id': 9999, + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, 'unsupported_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), 'unsupported_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), 'unsupported_box': '((50, 50), (0, 0))', # BOX, @@ -511,36 +484,488 @@ def setUp(self): 'unsupported_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, 'unsupported_xml': 'bar', # XML) }) - self.expected_records['unsupported_types'] = { - 'id': 9999, + self.expected_records[test_case] = { + 'id': self.inserted_records[-1]['id'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], } + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # TODO investigate how large our Nulls actually are ie. varchar how big? + # Don't need to be exact but we should get a rough idea of how large the record is. + # There is slight overhead in the record so it would be just undwer 20 megs. + # add a record with a text value that approaches the Stitch linmit ~ 20 Megabytes + # text ~ 6.36 megabytes why can't we get any larger? + test_case = 'maximum_boundary_text' + our_serial = 6 + single_record_limit = int((1024 * 1024 * 6.35) / 4 ) # 6.36 fails + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_text': single_record_limit * "🥖", # ~ 6 MB + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + # TODO | BUG_1 | We do not maintain -Infinity, Infinity, and NaN for + # floating-point or arbitrary-precision values + # add a record with -Inf for floating point types + test_case = 'negative_infinity_floats' + our_serial = 7 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_double': '-Inf', + 'our_real': '-Inf', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ + 'our_double': None, # BUG_1 + 'our_real': None, # BUG_1 + }) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # add a record with a text value ~ 10 Megabytes + # add a record with Inf for floating point types + test_case = 'positive_infinity_floats' + our_serial = 8 self.inserted_records.append({ - 'id': 666, - 'our_text': dfr.read_in('text') + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_double': 'Inf', + 'our_real': 'Inf', }) - self.expected_records['maximum_boundary_text'] = { - 'id': self.inserted_records[-1]['id'], - 'our_text': self.inserted_records[-1]['our_text'], - } + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ + 'our_double': None, # BUG_1 + 'our_real': None, # BUG_1 + }) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with NaN for floating point types + test_case = 'not_a_number_floats' + our_serial = 9 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_double': 'NaN', + 'our_real': 'NaN', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ + 'our_double': None, # BUG_1 + 'our_real': None, # BUG_1 + }) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with NaN for arbitrary precision types + test_case = 'not_a_number_numeric' + our_serial = 10 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_numeric': 'NaN', + 'our_decimal': 'NaN', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ + 'our_numeric': None, # BUG_1 + 'our_decimal': None, # BUG_1 + }) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with cidr/inet having IPV6 addresses + test_case = 'ipv6_cidr_inet' + our_serial = 11 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_cidr': 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128', + 'our_inet': 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with datetimes having 1 second precision + test_case = '0_digits_of_precision_datetimes' + our_serial = 12 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00+00:00', + quote_ident('OUR TIME', cur): '19:05:00', + quote_ident('OUR TIME TZ', cur): '19:05:00+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00+00:00', + 'OUR TS TZ': '1996-12-23T19:05:00+00:00', + 'OUR TIME': '19:05:00', + 'OUR TIME TZ': '19:05:00+00:00', + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # TODO | BUG_2 | We do not preserve datetime precision. + # If a record has a decimal value it is padded to 6 digits of precision. + # This is not the expected behavior. + + + # add a record with datetimes having .1 second precision + test_case = '1_digits_of_precision_datetimes' + our_serial = 13 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.1', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.1+00:00', + quote_ident('OUR TIME', cur): '19:05:00.1', + quote_ident('OUR TIME TZ', cur): '19:05:00.1+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.100000+00:00', # '1996-12-23T19:05:00.1+00:00', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.100000+00:00', # '1996-12-23T19:05:00.1+00:00', # BUG_2 + 'OUR TIME': '19:05:00.100000', # '19:05:00.1', # BUG_2 + 'OUR TIME TZ': '19:05:00.100000+00:00', # '19:05:00.1+00:00', # BUG_2 + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with datetimes having .01 second precision + test_case = '2_digits_of_precision_datetimes' + our_serial = 14 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.12', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.12+00:00', + quote_ident('OUR TIME', cur): '19:05:00.12', + quote_ident('OUR TIME TZ', cur): '19:05:00.12+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.120000+00:00', # '1996-12-23T19:05:00.12+00:00', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.120000+00:00', # '1996-12-23T19:05:00.12+00:00', # BUG_2 + 'OUR TIME': '19:05:00.120000', # '19:05:00.12', # BUG_2 + 'OUR TIME TZ': '19:05:00.120000+00:00', # '19:05:00.12+00:00', # BUG_2 + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with datetimes having .001 second precision + test_case = '3_digits_of_precision_datetimes' + our_serial = 15 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.123', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.123+00:00', + quote_ident('OUR TIME', cur): '19:05:00.123', + quote_ident('OUR TIME TZ', cur): '19:05:00.123+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.123000+00:00', # '1996-12-23T19:05:00.123+00:00', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123000+00:00', # '1996-12-23T19:05:00.123+00:00', # BUG_2 + 'OUR TIME': '19:05:00.123000', # '19:05:00.123', # BUG_2 + 'OUR TIME TZ': '19:05:00.123000+00:00', # '19:05:00.123+00:00', # BUG_2 + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with datetimes having .0001 secondprecision + test_case = '4_digits_of_precision_datetimes' + our_serial = 16 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.1234', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.1234+00:00', + quote_ident('OUR TIME', cur): '19:05:00.1234', + quote_ident('OUR TIME TZ', cur): '19:05:00.1234+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.123400+00:00', # '1996-12-23T19:05:00.1234+00:00', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123400+00:00', # '1996-12-23T19:05:00.1234+00:00', # BUG_2 + 'OUR TIME': '19:05:00.123400', # '19:05:00.1234', # BUG_2 + 'OUR TIME TZ': '19:05:00.123400+00:00', # '19:05:00.1234+00:00', # BUG_2 + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with datetimes having .00001 second precision + test_case = '5_digits_of_precision_datetimes' + our_serial = 17 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.12345', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.12345+00:00', + quote_ident('OUR TIME', cur): '19:05:00.12345', + quote_ident('OUR TIME TZ', cur): '19:05:00.12345+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.123450+00:00', # '1996-12-23T19:05:00.12345+00:00', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123450+00:00', # '1996-12-23T19:05:00.12345+00:00', # BUG_2 + 'OUR TIME': '19:05:00.123450', # '19:05:00.12345', # BUG_2 + 'OUR TIME TZ': '19:05:00.123450+00:00', # '19:05:00.12345+00:00', # BUG_2 + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + # add a record with datetimes having .000001 second precision + test_case = '6_digits_of_precision_datetimes' + our_serial = 18 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.123456', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.123456+00:00', + quote_ident('OUR TIME', cur): '19:05:00.123456', + quote_ident('OUR TIME TZ', cur): '19:05:00.123456+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.123456+00:00', + 'OUR TS TZ': '1996-12-23T19:05:00.123456+00:00', + 'OUR TIME': '19:05:00.123456', + 'OUR TIME TZ': '19:05:00.123456+00:00', + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # # 🥖 = 1f956 - # self.inserted_records.append({ - # 'id': 2147483647, # SERIAL PRIMARY KEY, - # 'our_char': "🥖", # CHAR, - # 'our_varchar': "a" * 20971520 # VARCHAR, - # 'our_varchar_big': "🥖" * 5242880base_string, # VARCHAR(10485760), - # 'our_char_big': "🥖", # CHAR(10485760), - # add a record with a text value ~ 10 Megabytes + # TODO | BUG_3 | floating-point precisions can't handle expected + # negative value nearest zero boundary + + # add a record with a negative value nearest zero for double and real + test_case = 'near_zero_negative_floats' + our_serial = 19 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_double': decimal.Decimal('-2.22507385850720e-308'), # -2.2250738585072014e-308, # BUG_3 + 'our_real': decimal.Decimal('-1.17549E-38'), #-1.175494351e-38 BUG_3 + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # TODO | BUG_4 | floating-point precisions can't handle expected + # positive value nearest zero boundary + + + # add a record with a positive value nearest zero for double and real + test_case = 'near_zero_positive_floats' + our_serial = 20 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_double': decimal.Decimal('2.22507385850720e-308'), # 2.2250738585072014e-308 BUG_4 + 'our_real': decimal.Decimal('1.17549e-38'), # 1.175494351e-38 BUG_4 + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + # add a record with the value 0 for double and real + test_case = 'zero_floats' + our_serial = 21 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_double': 0, + 'our_real': 0, # REAL, + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with an hstore that has spaces, commas, arrows, quotes, and escapes + test_case = 'special_characters_hstore' + our_serial = 22 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + # psycopg2 does not let us insert: ' "backslash" => "\\", "double_quote" => "\"" ' + 'our_hstore': ' "spaces" => " b a r ", "commas" => "f,o,o,", "arrow" => "=>", ' + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'our_hstore': { + "spaces": " b a r ", + "commas": "f,o,o,", + "arrow": "=>", + # "double_quote": "\"", + # "backslash": "\\" + } + }) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with Null for every field + test_case = 'null_for_all_fields_possible' + our_serial = 23 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_char': None, + 'our_varchar': None, + 'our_varchar_big': None, + 'our_char_big': None, + 'our_text': None, + 'our_text_2': None, + 'our_integer': None, + 'our_smallint': None, + 'our_bigint': None, + 'our_nospec_numeric': None, + 'our_numeric': None, + 'our_nospec_decimal': None, + 'our_decimal': None, + quote_ident('OUR TS', cur): None, + quote_ident('OUR TS TZ', cur): None, + quote_ident('OUR TIME', cur): None, + quote_ident('OUR TIME TZ', cur): None, + quote_ident('OUR DATE', cur): None, + 'our_double': None, + 'our_real': None, + 'our_boolean': None, + 'our_bit': None, + 'our_json': None, + 'our_jsonb': None, + 'our_uuid': None, + 'our_hstore': None, + 'our_citext': None, + 'our_cidr': None, + 'our_inet': None, + 'our_mac': None, + 'our_alignment_enum': None, + 'our_money': None, + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ + 'OUR TS': None, + 'OUR TS TZ': None, + 'OUR TIME': None, + 'OUR TIME TZ': None, + 'OUR DATE': None, + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + # TODO MANUAL TESTING + # EXCEED THE PYTHON LIMITATIONS FOR + # [] datetimes + # [] hstore + # psycopg2 does not let us insert escaped characters: + # try this manually: ' "backslash" => "\\", "double_quote" => "\"" ' + + + def null_out_remaining_fields(self, inserted_record): + all_fields = self.expected_fields() + unsupported_fields = self.expected_unsupported_fields() + set_fields = set(inserted_record.keys()) + + remaining_fields = all_fields.difference(set_fields).difference(unsupported_fields) + remaining_valid_fields_to_null = {field: None for field in remaining_fields} + + return remaining_valid_fields_to_null + @staticmethod def expected_check_streams(): return { 'postgres_datatypes_test'} @@ -560,78 +985,67 @@ def expected_primary_keys(): 'postgres_datatypes_test' : {'id'} } - @staticmethod - def expected_unsupported_fields(): - return { - 'unsupported_bigserial', - 'unsupported_bit_varying', - 'unsupported_box', - 'unsupported_bytea', - 'unsupported_circle', - 'unsupported_interval', - 'unsupported_line', - 'unsupported_lseg', - 'unsupported_path', - 'unsupported_pg_lsn', - 'unsupported_point', - 'unsupported_polygon', - 'unsupported_serial', - 'unsupported_smallserial', - 'unsupported_tsquery', - 'unsupported_tsvector', - 'unsupported_txid_snapshot', - 'unsupported_xml', - } + def expected_unsupported_fields(self): + expected_fields = self.expected_fields() + return {field + for field in expected_fields + if field.startswith("unsupported")} @staticmethod - def expected_schema_types(): + def expected_fields(): return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_hstore': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'unsupported_bigserial': 'bigint', - 'unsupported_bit_varying': 'bit varying', - 'unsupported_box': 'box', - 'unsupported_bytea': 'bytea', - 'unsupported_circle': 'circle', - 'unsupported_interval': 'interval', - 'unsupported_line': 'line', - 'unsupported_lseg': 'lseg', - 'unsupported_path': 'path', - 'unsupported_pg_lsn': 'pg_lsn', - 'unsupported_point': 'point', - 'unsupported_polygon': 'polygon', - 'unsupported_serial': 'integer', - 'unsupported_smallserial': 'smallint', - 'unsupported_tsquery': 'tsquery', - 'unsupported_tsvector': 'tsvector', - 'unsupported_txid_snapshot': 'txid_snapshot', - 'unsupported_xml': 'xml', + 'id', + 'our_varchar', # VARCHAR, + 'our_varchar_big', # VARCHAR(10485760), + 'our_char', # CHAR, + 'our_char_big', # CHAR(10485760), + 'our_text', # TEXT, + 'our_text_2', # TEXT, + 'our_integer', # INTEGER, + 'our_smallint', # SMALLINT, + 'our_bigint', # BIGINT, + 'our_nospec_numeric', # NUMERIC, + 'our_numeric', # NUMERIC(1000, 500), + 'our_nospec_decimal', # DECIMAL, + 'our_decimal', # DECIMAL(1000, 500), + 'OUR TS', # TIMESTAMP WITHOUT TIME ZONE, + 'OUR TS TZ', # TIMESTAMP WITH TIME ZONE, + 'OUR TIME', # TIME WITHOUT TIME ZONE, + 'OUR TIME TZ', # TIME WITH TIME ZONE, + 'OUR DATE', # DATE, + 'our_double', # DOUBLE PRECISION, + 'our_real', # REAL, + 'our_boolean', # BOOLEAN, + 'our_bit', # BIT(1), + 'our_json', # JSON, + 'our_jsonb', # JSONB, + 'our_uuid', # UUID, + 'our_hstore', # HSTORE, + 'our_citext', # CITEXT, + 'our_cidr', # cidr, + 'our_inet', # inet, + 'our_mac', # macaddr, + 'our_alignment_enum', # ALIGNMENT, + 'our_money', # money, + 'our_bigserial', # BIGSERIAL, + 'our_serial', # SERIAL, + 'our_smallserial', # SMALLSERIAL, + 'unsupported_bit', # BIT(80), + 'unsupported_bit_varying', # BIT VARYING(80), + 'unsupported_box', # BOX, + 'unsupported_bytea', # BYTEA, + 'unsupported_circle', # CIRCLE, + 'unsupported_interval', # INTERVAL, + 'unsupported_line', # LINE, + 'unsupported_lseg', # LSEG, + 'unsupported_path', # PATH, + 'unsupported_pg_lsn', # PG_LSN, + 'unsupported_point', # POINT, + 'unsupported_polygon', # POLYGON, + 'unsupported_tsquery', # TSQUERY, + 'unsupported_tsvector', # TSVECTOR, + 'unsupported_txid_snapshot', # TXID_SNAPSHOT, + 'unsupported_xml', # XML } @staticmethod @@ -762,25 +1176,46 @@ def datatypes_test(self, conn_id): for test_case, message in zip(self.expected_records.keys(), messages[1:]): with self.subTest(test_case=test_case): + if test_case == 'maximum_boundary_text': + import pdb; pdb.set_trace() + # grab our expected record expected_record = self.expected_records[test_case] # Verify replicated records match our expectations - for field in expected_record.keys(): + for field in self.expected_fields(): with self.subTest(field=field): + # unsupported fields should not be present in expected or actual records + if field.startswith("unsupported"): + + expected_field_value = expected_record.get(field, "FIELD MISSING AS EXPECTED") + actual_field_value = message['data'].get(field, "FIELD MISSING AS EXPECTED") + + self.assertEqual(expected_field_value, actual_field_value) + + # some data types require adjustments to actual values to make valid comparison... - if field == 'our_jsonb': - expected_field_value = expected_record.get(field, '{"MISSING": "FIELD"}') - actual_field_value = json.loads(message['data'].get(field, '{"MISSING": "FIELD"}')) + elif field == 'our_jsonb': + + expected_field_value = expected_record.get(field, '{"MISSING": "EXPECTED FIELD"}') + actual_field_value = message['data'].get(field, '{"MISSING": "ACTUAL FIELD"}') + + if actual_field_value is None: + + self.assertIsNone(expected_field_value) + + else: + + actual_field_value = json.loads(actual_field_value) + self.assertDictEqual(expected_field_value, actual_field_value) - self.assertDictEqual(expected_field_value, actual_field_value) # but most type do not else: - expected_field_value = expected_record.get(field, "MISSING FIELD") - actual_field_value = message['data'].get(field, "MISSING FIELD") + expected_field_value = expected_record.get(field, "MISSING EXPECTED FIELD") + actual_field_value = message['data'].get(field, "MISSING ACTUAL FIELD") self.assertEqual(expected_field_value, actual_field_value) From a26a1d5a082b58ffc1ba952381d7216dc4377f28 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 5 Apr 2021 15:08:34 +0000 Subject: [PATCH 14/26] added test case all utf-8 encodable unicode characters --- .circleci/config.yml | 2 +- setup.py | 2 +- tests/datatype_file_reader.py | 1 + tests/test_postgres_datatypes.py | 102 +++++++++++++++++++++++++++---- 4 files changed, 94 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index af6415d..9c72070 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,7 +22,7 @@ jobs: pyenv local 3.5.2 python3 -m venv /usr/local/share/virtualenvs/tap-postgres source /usr/local/share/virtualenvs/tap-postgres/bin/activate - pip install -U 'pip<19.2' 'setuptools<51.0.0' + pip install pip setuptool pip install .[dev] source dev_env.sh make test diff --git a/setup.py b/setup.py index 36fd3c1..024830d 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ classifiers=['Programming Language :: Python :: 3 :: Only'], install_requires=[ 'singer-python==5.3.1', - 'psycopg2==2.7.4', + 'psycopg2==2.8.4', 'strict-rfc3339==0.7', ], extras_require={ diff --git a/tests/datatype_file_reader.py b/tests/datatype_file_reader.py index bc1f460..756e557 100644 --- a/tests/datatype_file_reader.py +++ b/tests/datatype_file_reader.py @@ -9,6 +9,7 @@ datatype_to_file = { "text": "text_datatype.txt", + "text": "ascii_text.txt", } def _go_to_tests_directory(): diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index 84f0cc3..0c7f22b 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -3,6 +3,7 @@ import copy import unittest import decimal +from decimal import Decimal import uuid import json @@ -61,7 +62,7 @@ 'exclusiveMinimum': True, 'maximum': 100000000000000000000000000000000000000000000000000000000000000, 'minimum': -100000000000000000000000000000000000000000000000000000000000000, - 'multipleOf': "Decimal('1E-38')", + 'multipleOf': Decimal('1E-38'), 'type': ['null', 'number']}, 'our_double': {'type': ['null', 'number']}, 'our_inet': {'type': ['null', 'string']}, @@ -76,19 +77,19 @@ 'exclusiveMinimum': True, 'maximum': 100000000000000000000000000000000000000000000000000000000000000, 'minimum': -100000000000000000000000000000000000000000000000000000000000000, - 'multipleOf': "Decimal('1E-38')", + 'multipleOf': Decimal('1E-38'), 'type': ['null', 'number']}, 'our_nospec_numeric': {'exclusiveMaximum': True, 'exclusiveMinimum': True, 'maximum': 100000000000000000000000000000000000000000000000000000000000000, 'minimum': -100000000000000000000000000000000000000000000000000000000000000, - 'multipleOf': "Decimal('1E-38')", + 'multipleOf': Decimal('1E-38'), 'type': ['null', 'number']}, 'our_numeric': {'exclusiveMaximum': True, 'exclusiveMinimum': True, 'maximum': 100000000000000000000000000000000000000000000000000000000000000, 'minimum': -100000000000000000000000000000000000000000000000000000000000000, - 'multipleOf': "Decimal('1E-38')", + 'multipleOf': Decimal('1E-38'), 'type': ['null', 'number']}, 'our_real': {'type': ['null', 'number']}, 'our_serial': {'maximum': 2147483647, @@ -295,7 +296,7 @@ def setUp(self): self.inserted_records = [] self.expected_records = dict() - # TODO test out of bounds precision for DECIMAL + # insert a record wtih minimum values test_case = 'minimum_boundary_general' @@ -497,8 +498,10 @@ def setUp(self): # TODO investigate how large our Nulls actually are ie. varchar how big? # Don't need to be exact but we should get a rough idea of how large the record is. # There is slight overhead in the record so it would be just undwer 20 megs. - # add a record with a text value that approaches the Stitch linmit ~ 20 Megabytes # text ~ 6.36 megabytes why can't we get any larger? + + + # add a record with a text value that approaches the Stitch linmit ~ 20 Megabytes test_case = 'maximum_boundary_text' our_serial = 6 single_record_limit = int((1024 * 1024 * 6.35) / 4 ) # 6.36 fails @@ -516,6 +519,8 @@ def setUp(self): # TODO | BUG_1 | We do not maintain -Infinity, Infinity, and NaN for # floating-point or arbitrary-precision values + + # add a record with -Inf for floating point types test_case = 'negative_infinity_floats' our_serial = 7 @@ -948,13 +953,89 @@ def setUp(self): del self.expected_records[test_case][key] db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + # TODO BUG_5 | The target prevents us from sending this record. + # The expectation was that values with higher precision than + # allowed, would be rounded and handled. + + + # add a record with out-of-bounds precision for DECIMAL/NUMERIC + # test_case = 'out_of_bounds_precision_decimal_and_numeric' + # our_serial = 24 + # our_precision_too_high_decimal = decimal.Decimal('12345.' + '6' * 39) + # self.inserted_records.append({ + # 'id': our_serial, + # 'our_bigserial': our_serial, + # 'our_serial': our_serial, + # 'our_smallserial': our_serial, + # 'our_decimal': our_precision_too_high_decimal, + # 'our_nospec_decimal': our_precision_too_high_decimal, + # 'our_numeric': our_precision_too_high_decimal, + # 'our_nospec_numeric': our_precision_too_high_decimal, + # }) + # self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + # self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + # self.expected_records[test_case].update({ + # 'our_decimal': decimal.Decimal('12345.' + '6' * 37 + '7'), + # 'our_nospec_decimal': decimal.Decimal('12345.' + '6' * 37 + '7'), + # 'our_numeric': decimal.Decimal('12345.' + '6' * 37 + '7'), + # 'our_nospec_numeric': decimal.Decimal('12345.' + '6' * 37 + '7'), + # }) + # db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with all extended ascii characters + test_case = 'all_ascii_text' + our_serial = 26 + our_ascii = ''.join(chr(x) for x in range(128) if chr(x) != '\x00') + our_extended_ascii = ''.join(chr(x) for x in range(256) if chr(x) != '\x00') + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_text': our_ascii, + 'our_text_2': our_extended_ascii, + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # add a record with all unicode characters + test_case = 'all_unicode_text' + our_serial = 27 + our_unicode = '' + for x in range(1114112): + if x == 0: # skip 'null' because "ValueError: A string literal cannot contain NUL (0x00) characters." + continue + + unicode_char = chr(x) + try: + _ = unicode_char.encode() + except UnicodeEncodeError: # there are a range of unicode chars that cannot be utf-8 encoded + continue + + our_unicode += unicode_char + + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_text': our_unicode, + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + # TODO MANUAL TESTING # EXCEED THE PYTHON LIMITATIONS FOR # [] datetimes # [] hstore # psycopg2 does not let us insert escaped characters: # try this manually: ' "backslash" => "\\", "double_quote" => "\"" ' - + # [] null text ie. '\x00', we can't input with psycopg2 def null_out_remaining_fields(self, inserted_record): all_fields = self.expected_fields() @@ -1155,7 +1236,8 @@ def datatypes_test(self, conn_id): messages = records_by_stream[test_table_name]['messages'] # verify the persisted schema matches expectations TODO NEED TO GO TRHOUGH SCHEMA MANUALLY STILL - # self.assertEqual(expected_schema, records_by_stream[test_table_name]['schema']) + actual_schema = records_by_stream[test_table_name]['schema']['properties'] + self.assertEqual(expected_schema, actual_schema) # verify the number of records and number of messages match our expectations expected_record_count = len(self.expected_records) @@ -1176,12 +1258,10 @@ def datatypes_test(self, conn_id): for test_case, message in zip(self.expected_records.keys(), messages[1:]): with self.subTest(test_case=test_case): - if test_case == 'maximum_boundary_text': - import pdb; pdb.set_trace() - # grab our expected record expected_record = self.expected_records[test_case] + # Verify replicated records match our expectations for field in self.expected_fields(): with self.subTest(field=field): From 032a7663337a460c177ad6ea06a8cd84fb94d22e Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 5 Apr 2021 15:11:16 +0000 Subject: [PATCH 15/26] fix setuptools in circle config --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9c72070..58529bf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,7 +22,7 @@ jobs: pyenv local 3.5.2 python3 -m venv /usr/local/share/virtualenvs/tap-postgres source /usr/local/share/virtualenvs/tap-postgres/bin/activate - pip install pip setuptool + pip install pip setuptools pip install .[dev] source dev_env.sh make test From 547188dc40af25dd3ff1680ee8b42cfbfdb56be6 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 5 Apr 2021 15:13:39 +0000 Subject: [PATCH 16/26] put psycopg2 back to 2.7.4 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 024830d..36fd3c1 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ classifiers=['Programming Language :: Python :: 3 :: Only'], install_requires=[ 'singer-python==5.3.1', - 'psycopg2==2.8.4', + 'psycopg2==2.7.4', 'strict-rfc3339==0.7', ], extras_require={ From 067b0ab283fd834a09227a884bf35b5173675091 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Tue, 6 Apr 2021 21:05:58 +0000 Subject: [PATCH 17/26] finished test cases, cleanup --- tests/test_postgres_datatypes.py | 609 +++++++++++++++---------------- 1 file changed, 300 insertions(+), 309 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index 0c7f22b..d2a293b 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -1,4 +1,5 @@ import os +import sys import datetime import copy import unittest @@ -22,10 +23,10 @@ test_table_name = "postgres_datatypes_test" test_db = "dev" -# TODO manually verify this schema meets our expectations +# TODO_1 | Why are we chaging datatypes for time to string? expected_schema = {'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, + 'OUR TIME': {'type': ['null', 'string']}, # TODO_1 + 'OUR TIME TZ': {'type': ['null', 'string']}, # TODO_1 'OUR TS': {'format': 'date-time', 'type': ['null', 'string']}, 'OUR TS TZ': {'format': 'date-time', 'type': ['null', 'string']}, 'id': {'maximum': 2147483647, 'minimum': -2147483648, 'type': ['integer']}, @@ -56,23 +57,23 @@ 'our_boolean': {'type': ['null', 'boolean']}, 'our_char': {'maxLength': 1, 'type': ['null', 'string']}, 'our_char_big': {'maxLength': 10485760, 'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, + 'our_cidr': {'type': ['null', 'string']}, # TODO_1 'our_citext': {'type': ['null', 'string']}, 'our_decimal': {'exclusiveMaximum': True, 'exclusiveMinimum': True, - 'maximum': 100000000000000000000000000000000000000000000000000000000000000, + 'maximum': 100000000000000000000000000000000000000000000000000000000000000, # 62 'minimum': -100000000000000000000000000000000000000000000000000000000000000, 'multipleOf': Decimal('1E-38'), 'type': ['null', 'number']}, 'our_double': {'type': ['null', 'number']}, - 'our_inet': {'type': ['null', 'string']}, + 'our_inet': {'type': ['null', 'string']}, # TODO_1 'our_integer': {'maximum': 2147483647, 'minimum': -2147483648, 'type': ['null', 'integer']}, - 'our_json': {'type': ['null', 'string']}, # TODO Should this have a format?? - 'our_jsonb': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}, + 'our_json': {'type': ['null', 'string']}, # TODO_1 object ? see hstore + 'our_jsonb': {'type': ['null', 'string']}, # TODO_1 object ? + 'our_mac': {'type': ['null', 'string']}, # TODO_1 + 'our_money': {'type': ['null', 'string']}, # TODO_1 decimal(n, 2) ? 'our_nospec_decimal': {'exclusiveMaximum': True, 'exclusiveMinimum': True, 'maximum': 100000000000000000000000000000000000000000000000000000000000000, @@ -104,79 +105,15 @@ 'our_hstore': {'properties': {}, 'type': ['null', 'object']}, 'our_text': {'type': ['null', 'string']}, 'our_text_2': {'type': ['null', 'string']}, - 'our_uuid': {'type': ['null', 'string']}, + 'our_uuid': {'type': ['null', 'string']}, # TODO_1 'our_varchar': {'type': ['null', 'string']}, 'our_varchar_big': {'maxLength': 10485760, 'type': ['null', 'string']}} decimal.getcontext().prec = 131072 + 16383 -whitespace = ' \t\n\r\v\f' -ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' -ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -ascii_letters = ascii_lowercase + ascii_uppercase -digits = '0123456789' -punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" -our_ascii = ascii_letters + digits + punctuation + whitespace class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - [x] Generate 3 different fields with NUMERIC, - [] NUMERIC(precision, scale), - [x] NUMERIC(precision). - [x] Cover Maximum precision and scale - [x] Cover Minimum precision and scale - [x] Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - [x] Cover NaN, -Inf, Inf - [x] Zero - - - Character - TODOS - [x] Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - [x] VARCHAR(10485760) - [] Generate a 1 GB string?? -- Not Possbile, but we can approach 20 MB - [] text - cover one of every character that is allowed - [] UTF8 Unicode, 8-bit all Yes 1-4 Unicode - [] LATIN1 ISO 8859-1, ECMA 94 Western European Yes 1 ISO88591 - [] LATIN2 ISO 8859-2, ECMA 94 Central European Yes 1 ISO88592 - [] LATIN3 ISO 8859-3, ECMA 94 South European Yes 1 ISO88593 - [] LATIN4 ISO 8859-4, ECMA 94 North European Yes 1 ISO88594 - [] LATIN5 ISO 8859-9, ECMA 128 Turkish Yes 1 ISO88599 - [] LATIN6 ISO 8859-10, ECMA 144 Nordic Yes 1 ISO885910 - [] LATIN7 ISO 8859-13 Baltic Yes 1 ISO885913 - [] LATIN8 ISO 8859-14 Celtic Yes 1 ISO885914 - [] LATIN9 ISO 8859-15 LATIN1 with Euro and accents Yes 1 ISO885915 - [] LATIN10 ISO 8859-16, ASRO SR 14111 Romanian Yes 1 ISO885916 - [] investigate if we need to change COLLATION in order to accomplish ALL POSSIBLE CHARACTERS - - - Network Address Types - TODOs - [x] min and max for cider/inet 000's fff's - [x] ipv6 and ipv4 - - - Datetimes - TODOs - [x] Test all precisions 0..6 fractional seconds - - """ AUTOMATIC_FIELDS = "automatic" REPLICATION_KEYS = "valid-replication-keys" @@ -231,7 +168,7 @@ def setUp(self): canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - db_utils.set_db_time_zone(cur, '+15:59') #'America/New_York') + db_utils.set_db_time_zone(cur, '+15:59') create_table_sql = """ CREATE TABLE {} (id SERIAL PRIMARY KEY, @@ -297,56 +234,55 @@ def setUp(self): self.expected_records = dict() + # TODO | BUG_0 | The target blows up with greater than 38 digits before/after the decimal. + # Is this a known/expected behavior or a BUG in the target? + # It prevents us from testing what the tap claims to be able to support + # (100 precision, 38 scale) without rounding AND..The postgres limits WITH rounding. + # insert a record wtih minimum values test_case = 'minimum_boundary_general' - our_tz = pytz.timezone('Singapore') # GMT+8 min_date = datetime.date(1, 1, 1) - my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} - # TODO | BUG ? | The target blows up with greater than 38 digits before/after the decimal. - # Is this a known/expected behavior or a BUG in the target? - # It prevents us from testing what the tap claims to be able to support (100 precision, 38 scale) without rounding AND.. - # The postgres limits WITH rounding. - # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET - # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 62 + '.' + '9' * 37) # 131072 + 16383 - # my_absurdly_small_spec_decimal = decimal.Decimal('-' + '9'*500 + '.' + '9'*500) + my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # CURRENT LIMIT IN TARGET + # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 62 + '.' + '9' * 37) # 131072 + 16383 BUG_0 + # my_absurdly_small_spec_decimal = decimal.Decimal('-' + '9'*500 + '.' + '9'*500) # BUG_0 self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': "", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_nospec_numeric': my_absurdly_small_decimal, # NUMERIC, - 'our_numeric': my_absurdly_small_decimal, # NUMERIC(1000, 500), - 'our_nospec_decimal': my_absurdly_small_decimal, # DECIMAL, - 'our_decimal': my_absurdly_small_decimal, # DECIMAL(1000, 500), - quote_ident('OUR TS', cur): '0001-01-01T00:00:00.000001', # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): '0001-01-01T00:00:00.000001-15:59',#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00.000001', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00.000001-15:59', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': decimal.Decimal('-1.79769313486231e+308'), # DOUBLE PRECISION - 'our_real': decimal.Decimal('-3.40282e+38'), # REAL, - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': json.dumps(dict()), # JSON, - 'our_jsonb': json.dumps(dict()), # JSONB, - 'our_uuid': '00000000-0000-0000-0000-000000000000', # str(uuid.uuid1()) - 'our_hstore': None, # HSTORE, - 'our_citext': "", # CITEXT, - 'our_cidr': '00.000.000.000/32', # cidr, - 'our_inet': '00.000.000.000', # inet, - 'our_mac': '00:00:00:00:00:00', ## macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, - 'our_bigserial': 1, # BIGSERIAL, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, + 'id': 1, + 'our_char': "a", + 'our_varchar': "", + 'our_varchar_big': "", + 'our_char_big': "a", + 'our_text': "", + 'our_text_2': "", + 'our_integer': -2147483648, + 'our_smallint': -32768, + 'our_bigint': -9223372036854775808, + 'our_nospec_numeric': my_absurdly_small_decimal, + 'our_numeric': my_absurdly_small_decimal, + 'our_nospec_decimal': my_absurdly_small_decimal, + 'our_decimal': my_absurdly_small_decimal, + quote_ident('OUR TS', cur): '0001-01-01T00:00:00.000001', + quote_ident('OUR TS TZ', cur): '0001-01-01T00:00:00.000001-15:59', + quote_ident('OUR TIME', cur): '00:00:00.000001', + quote_ident('OUR TIME TZ', cur): '00:00:00.000001-15:59', + quote_ident('OUR DATE', cur): min_date,# + 'our_double': decimal.Decimal('-1.79769313486231e+308'), + 'our_real': decimal.Decimal('-3.40282e+38'), + 'our_boolean': False, + 'our_bit': '0', + 'our_json': json.dumps(dict()), + 'our_jsonb': json.dumps(dict()), + 'our_uuid': '00000000-0000-0000-0000-000000000000', + 'our_hstore': None, + 'our_citext': "", + 'our_cidr': '00.000.000.000/32', + 'our_inet': '00.000.000.000', + 'our_mac': '00:00:00:00:00:00', + 'our_alignment_enum': None, + 'our_money': '-$92,233,720,368,547,758.08', + 'our_bigserial': 1, + 'our_serial': 1, + 'our_smallserial': 1, }) self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update({ @@ -363,154 +299,26 @@ def setUp(self): 'our_mac': '00:00:00:00:00:00', }) my_keys = set(self.expected_records[test_case].keys()) - for key in my_keys: + for key in my_keys: # we need overwrite expectations for fields with spaces if key.startswith('"'): del self.expected_records[test_case][key] db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # insert a record wtih maximum values - test_case = 'maximum_boundary_general' - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - max_date = datetime.date(9999, 12, 31) - base_string = "Bread Sticks From Olive Garden" - my_absurdly_large_decimal = decimal.Decimal('9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': "🥖", # CHAR, - 'our_varchar': "a", #* 20971520, # VARCHAR, - 'our_varchar_big': "🥖" + base_string, # VARCHAR(10485714), - 'our_char_big': "🥖", # CHAR(10485760), - 'our_text': "apples", #dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_nospec_numeric': my_absurdly_large_decimal, # NUMERIC, - 'our_numeric': my_absurdly_large_decimal, # NUMERIC(1000, 500), - 'our_nospec_decimal': my_absurdly_large_decimal, # DECIMAL, - 'our_decimal': my_absurdly_large_decimal, # NUMERIC(1000, 500), - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): '9999-12-31T08:00:59.999999-15:59', #max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '23:59:59.999999+1559', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': decimal.Decimal('1.79769313486231e+308'), # DOUBLE PRECISION, - 'our_real': decimal.Decimal('3.40282e+38'), # '1E308', # REAL, - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': json.dumps({ - 'our_json_string': 'This is our JSON string type.', - 'our_json_number': 666, - 'our_json_object': { - 'our_json_string': 'This is our JSON string type.', - 'our_json_number': 666, - 'our_json_object': {'calm': 'down'}, - 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None], - 'our_json_boolean': True, - 'our_json_null': None, - }, - 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], - 'our_json_boolean': True, - 'our_json_null': None, - }), # JSON, - 'our_jsonb': json.dumps({ - 'our_jsonb_string': 'This is our JSONB string type.', - 'our_jsonb_number': 666, - 'our_jsonb_object': { - 'our_jsonb_string': 'This is our JSONB string type.', - 'our_jsonb_number': 666, - 'our_jsonb_object': {'calm': 'down'}, - 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None], - 'our_jsonb_boolean': True, - 'our_jsonb_null': None, - }, - 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], - 'our_jsonb_boolean': True, - 'our_jsonb_null': None, - }), # JSONB, - 'our_uuid':'ffffffff-ffff-ffff-ffff-ffffffffffff', # UUID, - 'our_hstore': '"foo"=>"bar","bar"=>"foo","dumdum"=>Null', # HSTORE, - 'our_citext': "aPpLeS", # CITEXT, - 'our_cidr': '199.199.199.128/32', # # cidr, - 'our_inet': '199.199.199.128', # inet, - 'our_mac': 'ff:ff:ff:ff:ff:ff', # macaddr - 'our_alignment_enum': 'u g l y', # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - }) - self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) - self.expected_records[test_case].update({ - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+15:59', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_char_big': "🥖" + " " * 10485759, - 'our_bit': True, - 'our_jsonb': json.loads(self.inserted_records[-1]['our_jsonb']), - 'our_hstore': {'foo': 'bar', 'bar': 'foo', 'dumdum': None}, - }) - my_keys = set(self.expected_records[test_case].keys()) - for key in my_keys: - if key.startswith('"'): - del self.expected_records[test_case][key] - db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - - - # insert a record with valid values for unsupported types - test_case = 'unsupported_types' - our_serial = 9999 - self.inserted_records.append({ - 'id': our_serial, - 'our_bigserial': our_serial, - 'our_serial': our_serial, - 'our_smallserial': our_serial, - 'unsupported_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'unsupported_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'unsupported_box': '((50, 50), (0, 0))', # BOX, - 'unsupported_bytea': "E'\\255'", # BYTEA, - 'unsupported_circle': '< (3, 1), 4 >', # CIRCLE, - 'unsupported_interval': '178000000 years', # INTERVAL, - 'unsupported_line': '{6, 6, 6}', # LINE, - 'unsupported_lseg': '(0 , 45), (45, 90)', # LSEG, - 'unsupported_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'unsupported_pg_lsn': '16/B374D848', # PG_LSN, - 'unsupported_point': '(1, 2)', # POINT, - 'unsupported_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'unsupported_tsquery': "'fat' & 'rat'", # TSQUERY, - 'unsupported_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'unsupported_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'unsupported_xml': 'bar', # XML) - }) - self.expected_records[test_case] = { - 'id': self.inserted_records[-1]['id'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - } - self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) - db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - - - # TODO investigate how large our Nulls actually are ie. varchar how big? - # Don't need to be exact but we should get a rough idea of how large the record is. - # There is slight overhead in the record so it would be just undwer 20 megs. - # text ~ 6.36 megabytes why can't we get any larger? + # BUG_6 | https://stitchdata.atlassian.net/browse/SRCE-5205 + # target uses binary notation for storage => 20 MB != 20 * (2^20) # add a record with a text value that approaches the Stitch linmit ~ 20 Megabytes test_case = 'maximum_boundary_text' - our_serial = 6 - single_record_limit = int((1024 * 1024 * 6.35) / 4 ) # 6.36 fails + our_serial = 2 + single_record_limit = 19990000 # 20*(1024*1024) # BUG_6 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, 'our_serial': our_serial, 'our_smallserial': our_serial, - 'our_text': single_record_limit * "🥖", # ~ 6 MB + 'our_text': single_record_limit * "a", }) self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) @@ -523,7 +331,7 @@ def setUp(self): # add a record with -Inf for floating point types test_case = 'negative_infinity_floats' - our_serial = 7 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -543,7 +351,7 @@ def setUp(self): # add a record with Inf for floating point types test_case = 'positive_infinity_floats' - our_serial = 8 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -563,7 +371,7 @@ def setUp(self): # add a record with NaN for floating point types test_case = 'not_a_number_floats' - our_serial = 9 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -583,7 +391,7 @@ def setUp(self): # add a record with NaN for arbitrary precision types test_case = 'not_a_number_numeric' - our_serial = 10 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -603,7 +411,7 @@ def setUp(self): # add a record with cidr/inet having IPV6 addresses test_case = 'ipv6_cidr_inet' - our_serial = 11 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -619,7 +427,7 @@ def setUp(self): # add a record with datetimes having 1 second precision test_case = '0_digits_of_precision_datetimes' - our_serial = 12 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -652,7 +460,7 @@ def setUp(self): # add a record with datetimes having .1 second precision test_case = '1_digits_of_precision_datetimes' - our_serial = 13 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -680,7 +488,7 @@ def setUp(self): # add a record with datetimes having .01 second precision test_case = '2_digits_of_precision_datetimes' - our_serial = 14 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -706,9 +514,9 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # add a record with datetimes having .001 second precision + # add a record with datetimes having .001 second (millisecond) precision test_case = '3_digits_of_precision_datetimes' - our_serial = 15 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -736,7 +544,7 @@ def setUp(self): # add a record with datetimes having .0001 secondprecision test_case = '4_digits_of_precision_datetimes' - our_serial = 16 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -764,7 +572,7 @@ def setUp(self): # add a record with datetimes having .00001 second precision test_case = '5_digits_of_precision_datetimes' - our_serial = 17 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -790,9 +598,9 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # add a record with datetimes having .000001 second precision + # add a record with datetimes having .000001 second (microsecond) precision test_case = '6_digits_of_precision_datetimes' - our_serial = 18 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -821,9 +629,10 @@ def setUp(self): # TODO | BUG_3 | floating-point precisions can't handle expected # negative value nearest zero boundary + # add a record with a negative value nearest zero for double and real test_case = 'near_zero_negative_floats' - our_serial = 19 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -843,7 +652,7 @@ def setUp(self): # add a record with a positive value nearest zero for double and real test_case = 'near_zero_positive_floats' - our_serial = 20 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -859,14 +668,14 @@ def setUp(self): # add a record with the value 0 for double and real test_case = 'zero_floats' - our_serial = 21 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, 'our_serial': our_serial, 'our_smallserial': our_serial, 'our_double': 0, - 'our_real': 0, # REAL, + 'our_real': 0, }) self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) @@ -875,14 +684,13 @@ def setUp(self): # add a record with an hstore that has spaces, commas, arrows, quotes, and escapes test_case = 'special_characters_hstore' - our_serial = 22 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, 'our_serial': our_serial, 'our_smallserial': our_serial, - # psycopg2 does not let us insert: ' "backslash" => "\\", "double_quote" => "\"" ' - 'our_hstore': ' "spaces" => " b a r ", "commas" => "f,o,o,", "arrow" => "=>", ' + 'our_hstore': r' "spaces" => " b a r ", "commas" => "f,o,o,", "arrow" => "=>", "backslash" => "\\", "double_quote" => "\"" ' }) self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) @@ -891,8 +699,8 @@ def setUp(self): "spaces": " b a r ", "commas": "f,o,o,", "arrow": "=>", - # "double_quote": "\"", - # "backslash": "\\" + "double_quote": "\"", + "backslash": "\\" } }) db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) @@ -900,7 +708,7 @@ def setUp(self): # add a record with Null for every field test_case = 'null_for_all_fields_possible' - our_serial = 23 + our_serial += 1 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -954,14 +762,15 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # TODO BUG_5 | The target prevents us from sending this record. - # The expectation was that values with higher precision than - # allowed, would be rounded and handled. + # TODO BUG_5 | The target prevents us from sending a record with numeric/decimal + # values that are out of the max precision of 6 decimal digits. + # The expectation is that values with higher precision than the allowed + # limit, would be rounded and handled. # add a record with out-of-bounds precision for DECIMAL/NUMERIC # test_case = 'out_of_bounds_precision_decimal_and_numeric' - # our_serial = 24 + # our_serial += 1 # our_precision_too_high_decimal = decimal.Decimal('12345.' + '6' * 39) # self.inserted_records.append({ # 'id': our_serial, @@ -986,7 +795,7 @@ def setUp(self): # add a record with all extended ascii characters test_case = 'all_ascii_text' - our_serial = 26 + our_serial += 1 our_ascii = ''.join(chr(x) for x in range(128) if chr(x) != '\x00') our_extended_ascii = ''.join(chr(x) for x in range(256) if chr(x) != '\x00') self.inserted_records.append({ @@ -1004,38 +813,186 @@ def setUp(self): # add a record with all unicode characters test_case = 'all_unicode_text' - our_serial = 27 + our_serial += 1 our_unicode = '' - for x in range(1114112): - if x == 0: # skip 'null' because "ValueError: A string literal cannot contain NUL (0x00) characters." - continue - - unicode_char = chr(x) - try: - _ = unicode_char.encode() - except UnicodeEncodeError: # there are a range of unicode chars that cannot be utf-8 encoded - continue + chars = list(range(1, 55296)) # skip 0 because 'null' is not supported + chars.extend(range(57344, sys.maxunicode + 1)) + for x in chars: + our_unicode += chr(x) + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'our_text': our_unicode, + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - our_unicode += unicode_char + # add a record with a non-specified varchar value that approaches the Stitch linmit ~ 20 Megabytes + test_case = 'maximum_boundary_varchar' + our_serial += 1 + single_record_limit = 19990000 # 20*(1024*1024) # BUG_6 self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, 'our_serial': our_serial, 'our_smallserial': our_serial, - 'our_text': our_unicode, + 'our_varchar': single_record_limit * "a", }) self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # TODO MANUAL TESTING + + # insert a record with valid values for unsupported types + test_case = 'unsupported_types' + our_serial = 9999 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + 'unsupported_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), + 'unsupported_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), + 'unsupported_box': '((50, 50), (0, 0))', # BOX, + 'unsupported_bytea': "E'\\255'", # BYTEA, + 'unsupported_circle': '< (3, 1), 4 >', # CIRCLE, + 'unsupported_interval': '178000000 years', # INTERVAL, + 'unsupported_line': '{6, 6, 6}', # LINE, + 'unsupported_lseg': '(0 , 45), (45, 90)', # LSEG, + 'unsupported_path': '((0, 0), (45, 90), (2, 56))', # PATH, + 'unsupported_pg_lsn': '16/B374D848', # PG_LSN, + 'unsupported_point': '(1, 2)', # POINT, + 'unsupported_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, + 'unsupported_tsquery': "'fat' & 'rat'", # TSQUERY, + 'unsupported_tsvector': "'fat':2 'rat':3", # TSVECTOR, + 'unsupported_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, + 'unsupported_xml': 'bar', # XML) + }) + self.expected_records[test_case] = { + 'id': self.inserted_records[-1]['id'], + 'our_bigserial': self.inserted_records[-1]['our_bigserial'], + 'our_serial': self.inserted_records[-1]['our_serial'], + 'our_smallserial': self.inserted_records[-1]['our_smallserial'], + } + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # insert a record wtih maximum values + test_case = 'maximum_boundary_general' + max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + max_date = datetime.date(9999, 12, 31) + base_string = "Bread Sticks From Olive Garden 🥖" + my_absurdly_large_decimal = decimal.Decimal('9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} + self.inserted_records.append({ + 'id': 2147483647, # SERIAL PRIMARY KEY, + 'our_char': "🥖", # CHAR, + 'our_varchar': "a", # * 20971520, # VARCHAR + 'our_varchar_big': "🥖" + base_string, # VARCHAR(10485714), + 'our_char_big': "🥖", # CHAR(10485760), + 'our_text': "apples", #dfr.read_in("text"), # TEXT, + 'our_text_2': None, # TEXT, + 'our_integer': 2147483647, # INTEGER, + 'our_smallint': 32767, # SMALLINT, + 'our_bigint': 9223372036854775807, # BIGINT, + 'our_nospec_numeric': my_absurdly_large_decimal, # NUMERIC, + 'our_numeric': my_absurdly_large_decimal, # NUMERIC(1000, 500), + 'our_nospec_decimal': my_absurdly_large_decimal, # DECIMAL, + 'our_decimal': my_absurdly_large_decimal, # NUMERIC(1000, 500), + quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, + quote_ident('OUR TS TZ', cur): '9999-12-31T08:00:59.999999-15:59', #max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, + quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, + quote_ident('OUR TIME TZ', cur): '23:59:59.999999+1559', # TIME WITH TIME ZONE, + quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, + 'our_double': decimal.Decimal('1.79769313486231e+308'), # DOUBLE PRECISION, + 'our_real': decimal.Decimal('3.40282e+38'), # '1E308', # REAL, + 'our_boolean': True, # BOOLEAN + 'our_bit': '1', # BIT(1), + 'our_json': json.dumps({ + 'our_json_string': 'This is our JSON string type.', + 'our_json_number': 666, + 'our_json_object': { + 'our_json_string': 'This is our JSON string type.', + 'our_json_number': 666, + 'our_json_object': {'calm': 'down'}, + 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_json_boolean': True, + 'our_json_null': None, + }, + 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], + 'our_json_boolean': True, + 'our_json_null': None, + }), # JSON, + 'our_jsonb': json.dumps({ + 'our_jsonb_string': 'This is our JSONB string type.', + 'our_jsonb_number': 666, + 'our_jsonb_object': { + 'our_jsonb_string': 'This is our JSONB string type.', + 'our_jsonb_number': 666, + 'our_jsonb_object': {'calm': 'down'}, + 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None], + 'our_jsonb_boolean': True, + 'our_jsonb_null': None, + }, + 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], + 'our_jsonb_boolean': True, + 'our_jsonb_null': None, + }), # JSONB, + 'our_uuid':'ffffffff-ffff-ffff-ffff-ffffffffffff', # UUID, + 'our_hstore': '"foo"=>"bar","bar"=>"foo","dumdum"=>Null', # HSTORE, + 'our_citext': "aPpLeS", # CITEXT, + 'our_cidr': '199.199.199.128/32', # # cidr, + 'our_inet': '199.199.199.128', # inet, + 'our_mac': 'ff:ff:ff:ff:ff:ff', # macaddr + 'our_alignment_enum': 'u g l y', # ALIGNMENT, + 'our_money': "$92,233,720,368,547,758.07", # money, + 'our_bigserial': 9223372036854775807, # BIGSERIAL, + 'our_serial': 2147483647, # SERIAL, + 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update({ + 'OUR TS': '9999-12-31T23:59:59.999999+00:00', + 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', + 'OUR TIME': '23:59:59.999999', + 'OUR TIME TZ': '23:59:59.999999+15:59', + 'OUR DATE': '9999-12-31T00:00:00+00:00', + 'our_char_big': "🥖" + " " * 10485759, + 'our_bit': True, + 'our_jsonb': json.loads(self.inserted_records[-1]['our_jsonb']), + 'our_hstore': {'foo': 'bar', 'bar': 'foo', 'dumdum': None}, + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + + # MANUAL TESTING # EXCEED THE PYTHON LIMITATIONS FOR # [] datetimes - # [] hstore - # psycopg2 does not let us insert escaped characters: - # try this manually: ' "backslash" => "\\", "double_quote" => "\"" ' - # [] null text ie. '\x00', we can't input with psycopg2 + + + # FUTURE TEST GOALS + # ERROR MESSAGE TESTING + # [] 'OUR TS': '4713-01-01 00:00:00.000000 BC' TIMESTAMP WITHOUT TIME ZONE, + # [] 'OUR TS TZ': '4713-01-01 00:00:00.000000 BC', TIMESTAMP WITH TIME ZONE, + # [] 'OUR TIME': '00:00:00.000001', TIME WITHOUT TIME ZONE, + # [] 'OUR TIME TZ': '00:00:00.000001-15:59', # TIME WITH TIME ZONE, + # [] 'OUR DATE': '4713-01-01 BC', DATE + # [] 'our_double': decimal.Decimal('-1.79769313486231e+308'), DOUBLE PRECISION + # [] 'our_real': decimal.Decimal('-3.40282e+38'), REAL, + # [] 'OUR TS': '294276-12-31 24:00:00.000000', TIMESTAMP WITHOUT TIME ZONE, + # [] 'OUR TS TZ': '294276-12-31 24:00:00.000000', TIMESTAMP WITH TIME ZONE, + # [] 'OUR TIME': '23:59:59.999999',# '24:00:00.000000' TIME WITHOUT TIME ZONE, + # [] 'OUR TIME TZ': '23:59:59.999999+1559', TIME WITH TIME ZONE, + # [] 'OUR DATE': '5874897-12-31', DATE, + def null_out_remaining_fields(self, inserted_record): all_fields = self.expected_fields() @@ -1049,11 +1006,15 @@ def null_out_remaining_fields(self, inserted_record): @staticmethod def expected_check_streams(): - return { 'postgres_datatypes_test'} + return { + 'postgres_datatypes_test', + } @staticmethod def expected_sync_streams(): - return { 'postgres_datatypes_test'} + return { + 'postgres_datatypes_test', + } def expected_check_stream_ids(self): """A set of expected table names in format""" @@ -1162,6 +1123,39 @@ def get_properties(self, original_properties=True): return return_value + + def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = True): + """Select all streams and all fields within streams or all streams and no fields.""" + + schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + + if self.default_replication_method is self.FULL_TABLE: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} + }] + + elif self.default_replication_method is self.INCREMENTAL: + additional_md = [{ + "breadcrumb": [], "metadata": { + "replication-method": self.INCREMENTAL, "replication-key": "OUR TS" + } + }] + + else: + additional_md = [{ + "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} + }] + + non_selected_properties = [] + if not select_all_fields: + # get a list of all properties so that none are selected + non_selected_properties = schema.get('annotated-schema', {}).get( + 'properties', {}).keys() + + connections.select_catalog_and_fields_via_metadata( + conn_id, catalog, schema, additional_md, non_selected_properties) + + def test_run(self): """Parametrized datatypes test running against each replication method.""" @@ -1218,10 +1212,7 @@ def datatypes_test(self, conn_id): # perform table selection print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - # TODO need to enable multiple replication methods (see auto fields test) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : self.default_replication_method}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) + self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=True) # run sync job 1 and verify exit codes sync_job_name = runner.run_sync_mode(self, conn_id) @@ -1235,7 +1226,7 @@ def datatypes_test(self, conn_id): records_by_stream = runner.get_records_from_target_output() messages = records_by_stream[test_table_name]['messages'] - # verify the persisted schema matches expectations TODO NEED TO GO TRHOUGH SCHEMA MANUALLY STILL + # verify the persisted schema matches expectations actual_schema = records_by_stream[test_table_name]['schema']['properties'] self.assertEqual(expected_schema, actual_schema) @@ -1291,7 +1282,7 @@ def datatypes_test(self, conn_id): self.assertDictEqual(expected_field_value, actual_field_value) - # but most type do not + # but most types do not else: expected_field_value = expected_record.get(field, "MISSING EXPECTED FIELD") From 9a4f969077f4797a6ae2b145dfe88b3503d875a5 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Wed, 7 Apr 2021 12:33:08 +0000 Subject: [PATCH 18/26] pin pip again --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 58529bf..af6415d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,7 +22,7 @@ jobs: pyenv local 3.5.2 python3 -m venv /usr/local/share/virtualenvs/tap-postgres source /usr/local/share/virtualenvs/tap-postgres/bin/activate - pip install pip setuptools + pip install -U 'pip<19.2' 'setuptools<51.0.0' pip install .[dev] source dev_env.sh make test From 09d5a7c05810f2d7622bcad9caa40e617e7c8cd0 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Wed, 7 Apr 2021 12:58:23 +0000 Subject: [PATCH 19/26] pylint fixes and cleanup --- tests/datatype_file_reader.py | 33 - tests/test_postgres_datatypes.py | 112 +- tests/text_datatype.txt | 224384 ---------------------------- 3 files changed, 54 insertions(+), 224475 deletions(-) delete mode 100644 tests/datatype_file_reader.py delete mode 100644 tests/text_datatype.txt diff --git a/tests/datatype_file_reader.py b/tests/datatype_file_reader.py deleted file mode 100644 index 756e557..0000000 --- a/tests/datatype_file_reader.py +++ /dev/null @@ -1,33 +0,0 @@ -import os - -potential_paths = [ - 'tests/', - '../tests/' - 'tap-postgres/tests/', - '../tap-postgres/tests/', -] - -datatype_to_file = { - "text": "text_datatype.txt", - "text": "ascii_text.txt", -} - -def _go_to_tests_directory(): - for path in potential_paths: - if os.path.exists(path): - os.chdir(path) - return os.getcwd() - raise NotImplementedError("This reader cannot run from {}".format(os.getcwd())) - - -def read_in(datatype: str = "text"): - print("Acquiring path to tests directory.") - cwd = _go_to_tests_directory() - - filename = datatype_to_file[datatype] - - print("Reading contents of {}.".format(filename)) - with open(cwd + "/" + filename, "r") as data: - contents = data.read() - - return contents diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index d2a293b..6f045ba 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -5,19 +5,16 @@ import unittest import decimal from decimal import Decimal -import uuid import json from psycopg2.extensions import quote_ident import psycopg2.extras -import pytz from tap_tester.scenario import (SCENARIOS) import tap_tester.connections as connections import tap_tester.menagerie as menagerie import tap_tester.runner as runner import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error test_schema_name = "public" test_table_name = "postgres_datatypes_test" @@ -818,7 +815,7 @@ def setUp(self): chars = list(range(1, 55296)) # skip 0 because 'null' is not supported chars.extend(range(57344, sys.maxunicode + 1)) for x in chars: - our_unicode += chr(x) + our_unicode += chr(x) self.inserted_records.append({ 'id': our_serial, 'our_bigserial': our_serial, @@ -855,22 +852,22 @@ def setUp(self): 'our_bigserial': our_serial, 'our_serial': our_serial, 'our_smallserial': our_serial, - 'unsupported_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'unsupported_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'unsupported_box': '((50, 50), (0, 0))', # BOX, - 'unsupported_bytea': "E'\\255'", # BYTEA, - 'unsupported_circle': '< (3, 1), 4 >', # CIRCLE, - 'unsupported_interval': '178000000 years', # INTERVAL, - 'unsupported_line': '{6, 6, 6}', # LINE, - 'unsupported_lseg': '(0 , 45), (45, 90)', # LSEG, - 'unsupported_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'unsupported_pg_lsn': '16/B374D848', # PG_LSN, - 'unsupported_point': '(1, 2)', # POINT, - 'unsupported_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'unsupported_tsquery': "'fat' & 'rat'", # TSQUERY, - 'unsupported_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'unsupported_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'unsupported_xml': 'bar', # XML) + 'unsupported_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', + 'unsupported_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', + 'unsupported_box': '((50, 50), (0, 0))', + 'unsupported_bytea': "E'\\255'", + 'unsupported_circle': '< (3, 1), 4 >', + 'unsupported_interval': '178000000 years', + 'unsupported_line': '{6, 6, 6}', + 'unsupported_lseg': '(0 , 45), (45, 90)', + 'unsupported_path': '((0, 0), (45, 90), (2, 56))', + 'unsupported_pg_lsn': '16/B374D848', + 'unsupported_point': '(1, 2)', + 'unsupported_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', + 'unsupported_tsquery': "'fat' & 'rat'", + 'unsupported_tsvector': "'fat':2 'rat':3", + 'unsupported_txid_snapshot': '10:20:10,14,15', + 'unsupported_xml': 'bar', }) self.expected_records[test_case] = { 'id': self.inserted_records[-1]['id'], @@ -885,33 +882,32 @@ def setUp(self): # insert a record wtih maximum values test_case = 'maximum_boundary_general' max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - max_date = datetime.date(9999, 12, 31) base_string = "Bread Sticks From Olive Garden 🥖" - my_absurdly_large_decimal = decimal.Decimal('9' * 38 + '.' + '9' * 38) # THIS IS OUR LIMIT IN THE TARGET} + my_absurdly_large_decimal = decimal.Decimal('9' * 38 + '.' + '9' * 38) self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': "🥖", # CHAR, - 'our_varchar': "a", # * 20971520, # VARCHAR - 'our_varchar_big': "🥖" + base_string, # VARCHAR(10485714), - 'our_char_big': "🥖", # CHAR(10485760), - 'our_text': "apples", #dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_nospec_numeric': my_absurdly_large_decimal, # NUMERIC, - 'our_numeric': my_absurdly_large_decimal, # NUMERIC(1000, 500), - 'our_nospec_decimal': my_absurdly_large_decimal, # DECIMAL, - 'our_decimal': my_absurdly_large_decimal, # NUMERIC(1000, 500), - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): '9999-12-31T08:00:59.999999-15:59', #max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '23:59:59.999999+1559', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': decimal.Decimal('1.79769313486231e+308'), # DOUBLE PRECISION, - 'our_real': decimal.Decimal('3.40282e+38'), # '1E308', # REAL, - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), + 'id': 2147483647, + 'our_char': "🥖", + 'our_varchar': "a", + 'our_varchar_big': "🥖" + base_string, + 'our_char_big': "🥖", + 'our_text': "apples", + 'our_text_2': None, + 'our_integer': 2147483647, + 'our_smallint': 32767, + 'our_bigint': 9223372036854775807, + 'our_nospec_numeric': my_absurdly_large_decimal, + 'our_numeric': my_absurdly_large_decimal, + 'our_nospec_decimal': my_absurdly_large_decimal, + 'our_decimal': my_absurdly_large_decimal, + quote_ident('OUR TS', cur): max_ts, + quote_ident('OUR TS TZ', cur): '9999-12-31T08:00:59.999999-15:59', + quote_ident('OUR TIME', cur): '23:59:59.999999', + quote_ident('OUR TIME TZ', cur): '23:59:59.999999+1559', + quote_ident('OUR DATE', cur): '5874897-12-31', + 'our_double': decimal.Decimal('1.79769313486231e+308'), + 'our_real': decimal.Decimal('3.40282e+38'), + 'our_boolean': True, + 'our_bit': '1', 'our_json': json.dumps({ 'our_json_string': 'This is our JSON string type.', 'our_json_number': 666, @@ -926,7 +922,7 @@ def setUp(self): 'our_json_array': ['our_json_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], 'our_json_boolean': True, 'our_json_null': None, - }), # JSON, + }), 'our_jsonb': json.dumps({ 'our_jsonb_string': 'This is our JSONB string type.', 'our_jsonb_number': 666, @@ -941,18 +937,18 @@ def setUp(self): 'our_jsonb_array': ['our_jsonb_arrary_string', 6, {'calm': 'down'}, False, None, ['apples', 6]], 'our_jsonb_boolean': True, 'our_jsonb_null': None, - }), # JSONB, - 'our_uuid':'ffffffff-ffff-ffff-ffff-ffffffffffff', # UUID, - 'our_hstore': '"foo"=>"bar","bar"=>"foo","dumdum"=>Null', # HSTORE, - 'our_citext': "aPpLeS", # CITEXT, - 'our_cidr': '199.199.199.128/32', # # cidr, - 'our_inet': '199.199.199.128', # inet, - 'our_mac': 'ff:ff:ff:ff:ff:ff', # macaddr - 'our_alignment_enum': 'u g l y', # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, + }), + 'our_uuid':'ffffffff-ffff-ffff-ffff-ffffffffffff', + 'our_hstore': '"foo"=>"bar","bar"=>"foo","dumdum"=>Null', + 'our_citext': "aPpLeS", + 'our_cidr': '199.199.199.128/32', + 'our_inet': '199.199.199.128', + 'our_mac': 'ff:ff:ff:ff:ff:ff', + 'our_alignment_enum': 'u g l y', + 'our_money': "$92,233,720,368,547,758.07", + 'our_bigserial': 9223372036854775807, + 'our_serial': 2147483647, + 'our_smallserial': 32767, }) self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update({ diff --git a/tests/text_datatype.txt b/tests/text_datatype.txt deleted file mode 100644 index 2c08d16..0000000 --- a/tests/text_datatype.txt +++ /dev/null @@ -1,224384 +0,0 @@ -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_automatic_fields_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_automatic_fields_test" -test_db = "dev" - -class PostgresAutomaticFields(unittest.TestCase): - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - default_replication_method = "" - - def tearDown(self): - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 19972, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 19972, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 19873, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 19873, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_automatic_fields_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_automatic_fields_test' } - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_automatic_fields_test' : {'id'} - } - - def expected_replication_keys(self): - replication_keys = { - 'postgres_automatic_fields_test' : {'our_integer'} - } - - if self.default_replication_method == self.INCREMENTAL: - return replication_keys - else: - return {'postgres_automatic_fields_test' : set()} - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_automatic_fields" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = False): - """Select all streams and all fields within streams or all streams and no fields.""" - - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - - if self.default_replication_method is self.FULL_TABLE: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.FULL_TABLE} - }] - - elif self.default_replication_method is self.INCREMENTAL: - additional_md = [{ - "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "our_integer" - } - }] - - else: - additional_md = [{ - "breadcrumb": [], "metadata": {"replication-method": self.LOG_BASED} - }] - - non_selected_properties = [] - if not select_all_fields: - # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}).keys() - - connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md, non_selected_properties) - - - def test_run(self): - """Parametrized automatic fields test running against each replication method.""" - - # Test running a sync with no fields selected using full-table replication - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self) - self.automatic_fields_test(full_table_conn_id) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys. - # As a result we cannot run a sync with no fields selected. This BUG should not - # be carried over into hp-postgres, but will not be fixed for this tap. - - # Test running a sync with no fields selected using key-based incremental replication - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.automatic_fields_test(incremental_conn_id) - - # Test running a sync with no fields selected using logical replication - self.default_replication_method = self.LOG_BASED - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - db_utils.ensure_replication_slot(cur, test_db) - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.automatic_fields_test(log_based_conn_id) - - - def automatic_fields_test(self, conn_id): - """Just testing we can sync with no fields selected. And that automatic fields still get synced.""" - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and NO FIELDS within the table'.format(test_table_name)) - self.select_streams_and_fields(conn_id, test_catalog, select_all_fields=False) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # expected values - expected_primary_keys = self.expected_primary_keys()[test_table_name] - expected_replication_keys = self.expected_replication_keys()[test_table_name] - expected_automatic_fields = expected_primary_keys.union(expected_replication_keys) - - # collect actual values - record_messages_keys = [set(message['data'].keys()) for message in messages[1:-1]] - - # verify the message actions match expectations for all replication methods - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # Verify that you get some records for each stream - self.assertGreater(record_count_by_stream[test_table_name], 0) - - # Verify that only the automatic fields are sent to the target - for actual_fields in record_messages_keys: - self.assertSetEqual(expected_automatic_fields, actual_fields) - - -SCENARIOS.add(PostgresAutomaticFields) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error -import datatype_file_reader as dfr # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_datatypes_test" -test_db = "dev" - - -class PostgresDatatypes(unittest.TestCase): - """ - TODO | My Running list - - - Arbitrary Precision Numbers - Numeric | exact up to 131072 digits before the decimal point; up to 16383 digits after the decimal point - when precision is explicitly stated, maximum is 1000 digits - TODOs - - Generate 3 different fields with NUMERIC, NUMERIC(precision, scale), NUMERIC(precision). - - Cover Maximum precision and scale - - Cover Minimum precision and scale - - Cover NaN - - - Floating-Point Types - - usually implementations of IEEE Standard 754 for Binary Floating-Point Arithmetic - - on most platforms, the real type has a range of at least 1E-37 to 1E+37 with a precision of at least 6 decimal digits - - double precision type typically has a range of around 1E-307 to 1E+308 with a precision of at least 15 digits - - numbers too close to zero that are not representable as distinct from zero will cause an underflow error. - TODOs - - Cover NaN, -Inf, Inf - - - - - Character - - - TODOS - - Generate different fields with VARCHAR, VARCHAR(n), CHAR, CHAR(n) - - VARCHAR(10485760) - - Generate a 1 GB string?? - - Binary Types - Bytea | binary string, sequence of octets can be written in hex or escape - TODOs - - Generate different fields for hex and escape - - - Network Address Types - TODOs - - Do with and without 'y' where input is number of bits in the netmask: input looks like 'address/y' - - For inet/cidr 'y' will default ot 32 for ipv4 and 128 for ipv6 - - For mac do all the input formats - [] '08:00:2b:01:02:03' - [] '08-00-2b-01-02-03' - [] '08002b:010203' - [] '08002b-010203' - [] '0800.2b01.0203' - [] '08002b010203' - - - Datestimes - TODOs - - Test values with second, millisecond and micrsecond precision - - Boolean - TODOs - - Enter all accpetable inputs for True: - TRUE - 't' - 'true' - 'y' - 'yes' - '1' - - Enter all acceptable inputs for False: - FALSE - 'f' - 'false' - 'n' - 'no' - '0' - """ - - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_big VARCHAR(10485760), - our_char CHAR, - our_char_big CHAR(10485760), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - our_bigserial BIGSERIAL, - invalid_bit BIT(80), - invalid_bit_varying BIT VARYING(80), - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - our_serial SERIAL, - our_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 wtih minimum values - min_ts = datetime.datetime(1, 1, 1, 0, 0, 0, 000000) - our_tz = pytz.timezone('Singapore') # GMT+8 - #min_ts_tz = our_tz.localize(min_ts) # TODO - # our_time = datetime.time(0, 0, 0) - # our_time_tz = our_time.isoformat() + "-04:00" - # our_date = datetime.date(1998, 3, 4) - min_date = datetime.date(1, 1, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'id': 1,# SERIAL PRIMARY KEY, - 'our_char': "a", # CHAR, - 'our_varchar': "", # VARCHAR, - 'our_varchar_big': "", # VARCHAR(10485760), - 'our_char_big': "a", # CHAR(10485760), - 'our_text': " ", # TEXT - 'our_text_2': "", # TEXT, - 'our_integer': -2147483648, # INTEGER, - 'our_smallint': -32768, # SMALLINT, - 'our_bigint': -9223372036854775808, # BIGINT, - 'our_decimal': decimal.Decimal(0.000000), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): min_ts, # '4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): min_ts,#_tz, #'4713-01-01 00:00:00.000000 BC', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '00:00:00', # TIME WITHOUT TIME ZONE, - quote_ident('OUR TIME TZ', cur): '00:00:00+1459', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): min_date,# '4713-01-01 BC', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': False, # BOOLEAN, - 'our_bit': '0', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '12.244.233.165/32', # cidr, - 'our_inet': '12.244.233.165/32', # inet, - 'our_mac': '08:00:2b:01:02:04',#'12.244.233.165/32', # macaddr, - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': '-$92,233,720,368,547,758.08', # money, TODO THis throws pyscopg error - 'our_bigserial': 1, # BIGSERIAL, - 'invalid_bit_varying': 80 * '0', # BIT VARYING(80), - 'invalid_bit': 80 * '0', # BIT(80), - 'invalid_box': None, # BOX, - 'invalid_bytea': "E'\\000'", # BYTEA, - 'invalid_circle': None, # CIRCLE, - 'invalid_interval': '-178000000 years', # INTERVAL, - 'invalid_line': None, # LINE, - 'invalid_lseg': None, # LSEG, - 'invalid_path': None, # PATH, - 'invalid_pg_lsn': None, # PG_LSN, - 'invalid_point': None, # POINT, - 'invalid_polygon': None, # POLYGON, - 'our_serial': 1, # SERIAL, - 'our_smallserial': 1, # SMALLSERIAL, - 'invalid_tsquery': None, # TSQUERY, - 'invalid_tsvector': None, # TSVECTOR, - 'invalid_txid_snapshot': None, # TXID_SNAPSHOT, - 'invalid_xml': None, # XML) - }) - self.expected_records.append({ - 'id': 1, - 'our_char': "a", - 'our_varchar': "", - 'our_varchar_big': "", - 'our_char_big': "a" + (10485760 - 1) * " ", # padded - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': -2147483648, - 'our_smallint': -32768, - 'our_bigint': -9223372036854775808, - 'our_decimal': '0.000000', - 'OUR TS': '0001-01-01T00:00:00+00:00', - 'OUR TS TZ': '0001-01-01T00:00:00+00:00', - 'OUR TIME': '00:00:00', - 'OUR TIME TZ': '00:00:00+14:59', - 'OUR DATE': '0001-01-01T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': False, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[0]) - - - # record 2 wtih maximum values - max_ts = datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) - # our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - # nyc_tz = pytz.timezone('America/New_York') - # our_ts_tz = nyc_tz.localize(our_ts) - # our_time = datetime.time(12,11,10) - # our_time_tz = our_time.isoformat() + "-04:00" - max_date = datetime.date(9999, 12, 31) - my_uuid = str(uuid.uuid1()) - base_string = "Bread Sticks From Olive Garden" - self.inserted_records.append({ - 'id': 2147483647, # SERIAL PRIMARY KEY, - 'our_char': None, # CHAR, - 'our_varchar': None, # VARCHAR, - 'our_varchar_big': "Bread Sticks From Olive Garden", # VARCHAR(10485760), - 'our_char_big': base_string + " " * (10485760 - len(base_string)), # CHAR(10485760), - 'our_text': dfr.read_in("text"), # TEXT, - 'our_text_2': None, # TEXT, - 'our_integer': 2147483647, # INTEGER, - 'our_smallint': 32767, # SMALLINT, - 'our_bigint': 9223372036854775807, # BIGINT, - 'our_decimal':decimal.Decimal('9876543210.02'), # NUMERIC(12,2), # TODO - quote_ident('OUR TS', cur): max_ts,# '9999-12-31 24:00:00.000000',# '294276-12-31 24:00:00.000000', # TIMESTAMP WITHOUT TIME ZONE, - quote_ident('OUR TS TZ', cur): max_ts, #'294276-12-31 24:00:00.000000', # TIMESTAMP WITH TIME ZONE, - quote_ident('OUR TIME', cur): '23:59:59.999999',# '24:00:00.000000' ->, # TIME WITHOUT TIME ZONE, - # '24:00:00.000000' -> 00:00:00 TODO BUG? - quote_ident('OUR TIME TZ', cur): '23:59:59.999999-0000', # TIME WITH TIME ZONE, - quote_ident('OUR DATE', cur): '5874897-12-31', # DATE, - 'our_double': None, # DOUBLE PRECISION, - 'our_real': None, # REAL, # TODO - 'our_boolean': True, # BOOLEAN - 'our_bit': '1', # BIT(1), - 'our_json': None, # JSON, - 'our_jsonb': None, # JSONB, - 'our_uuid': None, # UUID, - 'our_store': None, # HSTORE, - 'our_citext': None, # CITEXT, - 'our_cidr': '2001:0db8:0000:0000:0000:ff00:0042:7879/128', # cidr, - 'our_inet': '12.244.233.165/32',# TODO IPV6 value is rejected by pyscopg '2001:0db8:2222:3333:ghdk:ff00:0042:7879/128', # inet, - 'our_mac': '08:00:2b:01:02:03', # macaddr - 'our_alignment_enum': None, # ALIGNMENT, - 'our_money': "$92,233,720,368,547,758.07", # money, - 'our_bigserial': 9223372036854775807, # BIGSERIAL, - 'our_serial': 2147483647, # SERIAL, - 'our_smallserial': 32767, #2147483647, # SMALLSERIAL, - 'invalid_bit_varying': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT VARYING(80), - 'invalid_bit': '01110100011000010111000000101101011101000110010101110011011101000110010101110010', # BIT(80), - 'invalid_box': '((50, 50), (0, 0))', # BOX, - 'invalid_bytea': "E'\\255'", # BYTEA, - 'invalid_circle': '< (3, 1), 4 >', # CIRCLE, - 'invalid_interval': '178000000 years', # INTERVAL, - 'invalid_line': '{6, 6, 6}', # LINE, - 'invalid_lseg': None,# (0, 0 , 45, 90, 2, 56), # LSEG, - 'invalid_path': '((0, 0), (45, 90), (2, 56))', # PATH, - 'invalid_pg_lsn': '16/B374D848', # PG_LSN, - 'invalid_point': '(1, 2)', # POINT, - 'invalid_polygon': '((0, 0), (0, 10), (10, 0), (4, 5), (6, 7))', # POLYGON, - 'invalid_tsquery': "'fat' & 'rat'", # TSQUERY, - 'invalid_tsvector': "'fat':2 'rat':3", # TSVECTOR, - 'invalid_txid_snapshot': '10:20:10,14,15', # TXID_SNAPSHOT, - 'invalid_xml': 'bar', # XML) - }) - self.expected_records.append({ - 'id': 2147483647, - 'our_char': self.inserted_records[-1]['our_char'], - 'our_varchar': self.inserted_records[-1]['our_varchar'], - 'our_varchar_big': self.inserted_records[-1]['our_varchar_big'], - 'our_char_big': self.inserted_records[-1]['our_char_big'], - 'our_text': self.inserted_records[-1]['our_text'], - 'our_text_2': self.inserted_records[-1]['our_text_2'], - 'our_integer': 2147483647, - 'our_smallint': 32767, - 'our_bigint': 9223372036854775807, - 'our_decimal':decimal.Decimal('9876543210.02'), # TODO - 'OUR TS': '9999-12-31T23:59:59.999999+00:00', - 'OUR TS TZ': '9999-12-31T23:59:59.999999+00:00', - 'OUR TIME': '23:59:59.999999', - 'OUR TIME TZ': '23:59:59.999999+00:00', - 'OUR DATE': '9999-12-31T00:00:00+00:00', - 'our_double': None, - 'our_real': None, - 'our_boolean': self.inserted_records[-1]['our_boolean'], - 'our_bit': True, - 'our_json': None, - 'our_jsonb': None, - 'our_uuid': None, - 'our_store': None, - 'our_citext': None, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_alignment_enum': None, - 'our_money': self.inserted_records[-1]['our_money'], - 'our_bigserial': self.inserted_records[-1]['our_bigserial'], - 'our_serial': self.inserted_records[-1]['our_serial'], - 'our_smallserial': self.inserted_records[-1]['our_smallserial'], - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[1]) - - - @staticmethod - def expected_check_streams(): - return { 'postgres_datatypes_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_datatypes_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_datatypes_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_datatypes" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'dev' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized datatypes test running against each replication method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): - """ - Test Description: - Basic Datatypes Test for a database tap. - - Test Cases: - - """ - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_stream_ids()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the number of records and number of messages match our expectations - expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions - self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) - self.assertEqual(expected_message_count, len(messages)) - - # verify we start and end syncs with an activate version message - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) - - # verify the remaining messages are upserts - actions = {message['action'] for message in messages if message['action'] != 'activate_version'} - self.assertSetEqual({'upsert'}, actions) - - - # NB | assertDictEquals gives ugly output due to HUGE string values in our records so - # use the subTest pattern and go value by value instead. - - - # verify expected minimum values were replicated - expected_record_mins = self.expected_records[0] - for key in expected_record_mins.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_mins[key], messages[1]['data'].get(key, "MISSING FIELD")) - - - # verify expected maximum values were replicated - expected_record_maxes = self.expected_records[1] - for key in expected_record_maxes.keys(): - with self.subTest(field=key): - self.assertEqual(expected_record_maxes[key], messages[2]['data'].get(key, "MISSING FIELD")) - - -SCENARIOS.add(PostgresDatatypes) -import os -import datetime -import unittest -import decimal -import uuid -import json - -from psycopg2.extensions import quote_ident -import psycopg2.extras -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_discovery_test" -test_db = "discovery1" - - -class PostgresDiscovery(unittest.TestCase): - AUTOMATIC_FIELDS = "automatic" - REPLICATION_KEYS = "valid-replication-keys" - PRIMARY_KEYS = "table-key-properties" - FOREIGN_KEYS = "table-foreign-key-properties" - REPLICATION_METHOD = "forced-replication-method" - API_LIMIT = "max-row-limit" - INCREMENTAL = "INCREMENTAL" - FULL_TABLE = "FULL_TABLE" - LOG_BASED = "LOG_BASED" - - UNSUPPORTED_TYPES = { - "BIGSERIAL", - "BIT VARYING", - "BOX", - "BYTEA", - "CIRCLE", - "INTERVAL", - "LINE", - "LSEG", - "PATH", - "PG_LSN", - "POINT", - "POLYGON", - "SERIAL", - "SMALLSERIAL", - "TSQUERY", - "TSVECTOR", - "TXID_SNAPSHOT", - "XML", - } - default_replication_method = "" - - def tearDown(self): - pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db(test_db) - self.maxDiff = None - - with db_utils.get_test_connection(test_db) as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.ensure_replication_slot(cur, test_db) - - canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money, - invalid_bigserial BIGSERIAL, - invalid_bit_varying BIT VARYING, - invalid_box BOX, - invalid_bytea BYTEA, - invalid_circle CIRCLE, - invalid_interval INTERVAL, - invalid_line LINE, - invalid_lseg LSEG, - invalid_path PATH, - invalid_pg_lsn PG_LSN, - invalid_point POINT, - invalid_polygon POLYGON, - invalid_serial SERIAL, - invalid_smallserial SMALLSERIAL, - invalid_tsquery TSQUERY, - invalid_tsvector TSVECTOR, - invalid_txid_snapshot TXID_SNAPSHOT, - invalid_xml XML) - """.format(canonicalized_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.recs = [] - for _ in range(500): - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - record = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - db_utils.insert_record(cur, test_table_name, record) - self.recs.append(record) - - cur.execute("""ANALYZE {}""".format(canonicalized_table_name)) - - @staticmethod - def expected_check_streams(): - return { 'postgres_discovery_test'} - - def expected_check_stream_ids(self): - """A set of expected table names in format""" - check_streams = self.expected_check_streams() - return {"{}-{}-{}".format(test_db, test_schema_name, stream) for stream in check_streams} - - @staticmethod - def expected_primary_keys(): - return { - 'postgres_discovery_test' : {'id'} - } - - @staticmethod - def expected_unsupported_fields(): - return { - 'invalid_bigserial', - 'invalid_bit_varying', - 'invalid_box', - 'invalid_bytea', - 'invalid_circle', - 'invalid_interval', - 'invalid_line', - 'invalid_lseg', - 'invalid_path', - 'invalid_pg_lsn', - 'invalid_point', - 'invalid_polygon', - 'invalid_serial', - 'invalid_smallserial', - 'invalid_tsquery', - 'invalid_tsvector', - 'invalid_txid_snapshot', - 'invalid_xml', - } - @staticmethod - def expected_schema_types(): - return { - 'id': 'integer', # 'serial primary key', - 'our_varchar': 'character varying', # 'varchar' - 'our_varchar_10': 'character varying', # 'varchar(10)', - 'our_text': 'text', - 'our_text_2': 'text', - 'our_integer': 'integer', - 'our_smallint': 'smallint', - 'our_bigint': 'bigint', - 'our_decimal': 'numeric', - 'OUR TS': 'timestamp without time zone', - 'OUR TS TZ': 'timestamp with time zone', - 'OUR TIME': 'time without time zone', - 'OUR TIME TZ': 'time with time zone', - 'OUR DATE': 'date', - 'our_double': 'double precision', - 'our_real': 'real', - 'our_boolean': 'boolean', - 'our_bit': 'bit', - 'our_json': 'json', - 'our_jsonb': 'jsonb', - 'our_uuid': 'uuid', - 'our_store': 'hstore', - 'our_citext': 'citext', - 'our_cidr': 'cidr', - 'our_inet': 'inet', - 'our_mac': 'macaddr', - 'our_alignment_enum': 'alignment', - 'our_money': 'money', - 'invalid_bigserial': 'bigint', - 'invalid_bit_varying': 'bit varying', - 'invalid_box': 'box', - 'invalid_bytea': 'bytea', - 'invalid_circle': 'circle', - 'invalid_interval': 'interval', - 'invalid_line': 'line', - 'invalid_lseg': 'lseg', - 'invalid_path': 'path', - 'invalid_pg_lsn': 'pg_lsn', - 'invalid_point': 'point', - 'invalid_polygon': 'polygon', - 'invalid_serial': 'integer', - 'invalid_smallserial': 'smallint', - 'invalid_tsquery': 'tsquery', - 'invalid_tsvector': 'tsvector', - 'invalid_txid_snapshot': 'txid_snapshot', - 'invalid_xml': 'xml', - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_discovery" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - def get_properties(self, original_properties=True): - return_value = { - 'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : self.FULL_TABLE, - 'filter_dbs' : 'discovery1' - } - if not original_properties: - if self.default_replication_method is self.LOG_BASED: - return_value['wal2json_message_format'] = '1' - - return_value['default_replication_method'] = self.default_replication_method - - return return_value - - def test_run(self): - """Parametrized discovery test running against each replicatio method.""" - - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(full_table_conn_id) - - self.default_replication_method = self.INCREMENTAL - incremental_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(incremental_conn_id) - - # NB | We are able to generate a connection and run discovery with a default replication - # method of logical replication WITHOUT selecting a replication slot. This is not - # ideal behavior. This BUG should not be carried over into hp-postgres, but will not - # be fixed for this tap. - self.default_replication_method = self.LOG_BASED - log_based_conn_id = connections.ensure_connection(self, original_properties=False) - self.discovery_test(log_based_conn_id) - - def discovery_test(self, conn_id): - """ - Basic Discovery Test for a database tap. - - Test Description: - Ensure discovery runs without exit codes and generates a catalog of the expected form - - Test Cases: - - Verify discovery generated the expected catalogs by name. - - Verify that the table_name is in the format for each stream. - - Verify the caatalog is found for a given stream. - - Verify there is only 1 top level breadcrumb in metadata for a given stream. - - Verify replication key(s) match expectations for a given stream. - - Verify primary key(s) match expectations for a given stream. - - Verify the replication method matches our expectations for a given stream. - - Verify that only primary keys are given the inclusion of automatic in metadata - for a given stream. - - Verify expected unsupported fields are given the inclusion of unsupported in - metadata for a given stream. - - Verify that all fields for a given stream which are not unsupported or automatic - have inclusion of available. - - Verify row-count metadata matches expectations for a given stream. - - Verify selected metadata is None for all streams. - - Verify is-view metadata is False for a given stream. - - Verify no forced-replication-method is present in metadata for a given stream. - - Verify schema and db match expectations for a given stream. - - Verify schema types match expectations for a given stream. - """ - # TODO Generate multiple tables (streams) and maybe dbs too? - - # run discovery (check mode) - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # Verify discovery generated a catalog - found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0) - - # Verify discovery generated the expected catalogs by name - found_catalog_names = {catalog['stream_name'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # Verify that the table_name is in the format for each stream - found_catalog_stream_ids = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_stream_ids(), found_catalog_stream_ids) - - # Test by stream - for stream in self.expected_check_streams(): - with self.subTest(stream=stream): - - # Verify the caatalog is found for a given stream - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) - self.assertTrue(isinstance(catalog, dict)) - - # collecting expected values - expected_primary_keys = self.expected_primary_keys()[stream] - expected_replication_keys = set() - expected_unsupported_fields = self.expected_unsupported_fields() - expected_fields_to_datatypes = self.expected_schema_types() - expected_row_count = len(self.recs) - - # collecting actual values... - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) - stream_metadata = schema_and_metadata["metadata"] - top_level_metadata = [item for item in stream_metadata if item.get("breadcrumb") == []] - stream_properties = top_level_metadata[0]['metadata'] - actual_primary_keys = set(stream_properties.get(self.PRIMARY_KEYS, [])) - actual_replication_keys = set(stream_properties.get(self.REPLICATION_KEYS, [])) - actual_replication_method = stream_properties.get(self.REPLICATION_METHOD) - actual_automatic_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "automatic" - ) - actual_unsupported_fields = set( - item.get("breadcrumb", ["properties", None])[1] for item in stream_metadata - if item.get("metadata").get("inclusion") == "unsupported" - ) - actual_fields_to_datatypes = { - item['breadcrumb'][1]: item['metadata'].get('sql-datatype') - for item in stream_metadata[1:] - } - - # Verify there is only 1 top level breadcrumb in metadata - self.assertEqual(1, len(top_level_metadata)) - - # Verify replication key(s) match expectations - self.assertSetEqual( - expected_replication_keys, actual_replication_keys - ) - - # NB | We expect primary keys and replication keys to have inclusion automatic for - # key-based incremental replication. But that is only true for primary keys here. - # This BUG should not be carried over into hp-postgres, but will not be fixed for this tap. - - # Verify primary key(s) match expectations - self.assertSetEqual( - expected_primary_keys, actual_primary_keys, - ) - - # Verify the replication method matches our expectations - self.assertIsNone(actual_replication_method) - - # Verify that only primary keys - # are given the inclusion of automatic in metadata. - self.assertSetEqual(expected_primary_keys, actual_automatic_fields) - - - # DOCS_BUG_1 | https://stitchdata.atlassian.net/browse/DOC-1643 - # The following types were converted and selected, but docs say unsupported. - # Still need to investigate how the tap handles values of these datatypes - # during sync. - KNOWN_MISSING = { - 'invalid_bigserial', # BIGSERIAL -> bigint - 'invalid_serial', # SERIAL -> integer - 'invalid_smallserial', # SMALLSERIAL -> smallint - } - # Verify expected unsupported fields - # are given the inclusion of unsupported in metadata. - self.assertSetEqual(expected_unsupported_fields, actual_unsupported_fields | KNOWN_MISSING) - - - # Verify that all other fields have inclusion of available - # This assumes there are no unsupported fields for SaaS sources - self.assertTrue( - all({item.get("metadata").get("inclusion") == "available" - for item in stream_metadata - if item.get("breadcrumb", []) != [] - and item.get("breadcrumb", ["properties", None])[1] - not in actual_automatic_fields - and item.get("breadcrumb", ["properties", None])[1] - not in actual_unsupported_fields}), - msg="Not all non key properties are set to available in metadata") - - # Verify row-count metadata matches expectations - self.assertEqual(expected_row_count, stream_properties['row-count']) - - # Verify selected metadata is None for all streams - self.assertNotIn('selected', stream_properties.keys()) - - # Verify is-view metadata is False - self.assertFalse(stream_properties['is-view']) - - # Verify no forced-replication-method is present in metadata - self.assertNotIn(self.REPLICATION_METHOD, stream_properties.keys()) - - # Verify schema and db match expectations - self.assertEqual(test_schema_name, stream_properties['schema-name']) - self.assertEqual(test_db, stream_properties['database-name']) - - # Verify schema types match expectations - self.assertDictEqual(expected_fields_to_datatypes, actual_fields_to_datatypes) - -SCENARIOS.add(PostgresDiscovery) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import SCENARIOS -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -test_schema_name = "public" -test_table_name = "postgres_drop_table_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -class PostgresDropTable(unittest.TestCase): - - @staticmethod - def name(): - return "tap_tester_postgres_drop_table_field_selection" - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'discovery0' - } - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def expected_check_streams(): - return { 'discovery0-public-postgres_drop_table_test'} - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('discovery0') - - with db_utils.get_test_connection('discovery0') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - #pylint: disable=line-too-long - create_table_sql = 'CREATE TABLE {} (id SERIAL PRIMARY KEY)'.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # Run discovery - check_job_name = runner.run_check_mode(self, conn_id) - - # Verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # There should not be any tables in this database - with db_utils.get_test_connection('discovery0') as conn: - cur = conn.cursor() - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - # Run discovery again - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - - # When discovery mode finds 0 tables, the tap returns an error - self.assertEqual(exit_status['discovery_exit_status'], 1) - - - - -SCENARIOS.add(PostgresDropTable) -import datetime -import decimal -import json -import os -import unittest -import uuid - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'}, 'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}} , - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'}, 'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'}, 'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'}, 'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'}, 'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}, 'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresFullTableRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - print("inserting a record") - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(3, len(records_by_stream[test_table_name]['messages'])) - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'upsert') - self.assertEqual(records_by_stream[test_table_name]['messages'][2]['action'], - 'activate_version') - actual_record_1 = records_by_stream[test_table_name]['messages'][1]['data'] - - expected_inserted_record = {'id': 1, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - print("inserted record is correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNone(bookmark.get('lsn'), - msg="expected bookmark for stream to have NO lsn because we are using full-table replication") - -SCENARIOS.add(PostgresFullTableRepArrays) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import psycopg2.extras -from psycopg2.extensions import quote_ident -import pytz -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -NUMERIC_SCALE=2 -NUMERIC_PRECISION=12 - -expected_schemas = {'postgres_full_table_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties': {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal(str(10 ** (0 - NUMERIC_SCALE))), 'type': ['null', 'number'], - 'maximum': 10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMinimum': True, - 'minimum': -10 ** (NUMERIC_PRECISION - NUMERIC_SCALE), 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807,'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -test_schema_name = "public" -test_table_name = "postgres_full_table_replication_test" - - -class PostgresFullTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC({},{}), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canon_table_name, NUMERIC_PRECISION, NUMERIC_SCALE) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - # record 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps({'burgers' : 'good'}), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal 4', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'good', - 'our_money': '100.1122', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good"}', - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_alignment_enum' : self.inserted_records[0]['our_alignment_enum'], - 'our_money' : '$100.11' - }) - # record 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps(["nymn 77"]), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '["nymn 77"]', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_alignment_enum' : None, - 'our_money': None - }) - # record 3 - self.inserted_records.append({ - 'our_decimal' : decimal.Decimal('NaN'), - 'our_double' : float('nan'), - 'our_real' : float('-inf') - }) - self.expected_records.append({ - 'id': 3, - # We cast NaN's, +Inf, -Inf to NULL as wal2json does not support - # them and now we are at least consistent(ly wrong). - 'our_decimal' : None, - 'our_double' : None, - 'our_real' : None, - # any field without a set value will be set to NULL - 'OUR TIME': None, - 'our_text': None, - 'our_bit': None, - 'our_integer': None, - 'our_json': None, - 'our_boolean': None, - 'our_jsonb': None, - 'our_bigint': None, - 'OUR TIME TZ': None, - 'our_store': None, - 'OUR TS TZ': None, - 'our_smallint': None, - 'OUR DATE': None, - 'our_varchar': None, - 'OUR TS': None, - 'our_uuid': None, - 'our_varchar_10': None, - 'our_citext': None, - 'our_inet': None, - 'our_cidr': None, - 'our_mac': None, - 'our_alignment_enum': None, - 'our_money': None - }) - - for record in self.inserted_records: - db_utils.insert_record(cur, test_table_name, record) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_full_table_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_full_table_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_full_table_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_full_table_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'frequency_in_minutes': '1', - # 'default_replication_method' : 'LOG_BASED', - 'filter_dbs' : 'postgres,dev', - # 'ssl' : 'true', # TODO: Disabling for docker-based container - 'itersize' : '10' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'FULL_TABLE'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_1 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_1, bookmark['version']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN and get the same 3 records - #---------------------------------------------------------------------- - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_2 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('activate_version', messages[3]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_2, table_version_1) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[0], messages[0]['data']) - self.assertDictEqual(self.expected_records[1], messages[1]['data']) - self.assertDictEqual(self.expected_records[2], messages[2]['data']) - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_2, bookmark['version']) - - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record - # Insert a record to be updated prior to sync - # Insert a record to be deleted prior to sync (NOT REPLICATED) - - # Update an existing record - # Update a newly inserted record - - # Delete an existing record - # Delete a newly inserted record - - # inserting... - # a new record - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99', - 'our_alignment_enum': None, - }) - # a new record which we will then update prior to sync - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - # a new record to be deleted prior to sync - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None, - 'our_alignment_enum': None, - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - - # updating ... - # an existing record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # a newly inserted record - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 5 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[4]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[4]["our_double"] = decimal.Decimal("6.6") - self.expected_records[4]["our_money"] = "$0.00" - - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - - # deleting - # an existing record - record_pk = 2 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # a newly inserted record - record_pk = 6 - db_utils.delete_record(cur, canon_table_name, record_pk) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN after vairous manipulations - #---------------------------------------------------------------------- - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_pks() - ) - records_by_stream = runner.get_records_from_target_output() - table_version_3 = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(4, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(5, len(messages)) - self.assertEqual('upsert', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - self.assertEqual('activate_version', messages[4]['action']) - - # verify the new table version increased on the second sync - self.assertGreater(table_version_3, table_version_2) - - # verify the persisted schema still matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - - # NB | This is a little tough to track mentally so here's a breakdown of - # the order of operations by expected records indexes: - - # Prior to Sync 1 - # insert 0, 1, 2 - - # Prior to Sync 2 - # No db changes - - # Prior to Sync 3 - # insert 3, 4, 5 - # update 0, 4 - # delete 1, 5 - - # Resulting Synced Records: 2, 3, 0, 4 - - - # verify replicated records still match expectations - self.assertDictEqual(self.expected_records[2], messages[0]['data']) # existing insert - self.assertDictEqual(self.expected_records[3], messages[1]['data']) # new insert - self.assertDictEqual(self.expected_records[0], messages[2]['data']) # existing update - self.assertDictEqual(self.expected_records[4], messages[3]['data']) # new insert / update - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_full_table_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertIsNone(bookmark.get('replication_key')) - self.assertIsNone(bookmark.get('replication_key_value')) - self.assertEqual(table_version_3, bookmark['version']) - - -SCENARIOS.add(PostgresFullTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_incremental_replication_test" -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']} - }}} - -class PostgresIncrementalTable(unittest.TestCase): - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_inet inet, - our_cidr cidr, - our_mac macaddr, - our_money money) - """.format(db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name)) - - cur.execute(create_table_sql) - - # insert fixture data and track expected records - self.inserted_records = [] - self.expected_records = [] - - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - - # record 1 - our_ts = datetime.datetime(1977, 3, 3, 3, 3, 3, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('9876543210.02'), - 'OUR TIME': str(our_time), - 'our_text': 'some text 2', - 'our_bit': True, - 'our_integer': 44101, - 'our_double': decimal.Decimal('1.1'), - 'id': 1, - 'our_json': '{"nymn": 77}', - 'our_boolean': True, - 'our_jsonb': '{"burgers": "good++"}', - 'our_bigint': 1000001, - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'our_smallint': 2, - 'OUR DATE': '1964-07-01T00:00:00+00:00', - 'our_varchar': 'our_varchar 2', - 'OUR TS': self.expected_ts(our_ts), - 'our_uuid': self.inserted_records[0]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext' : self.inserted_records[0]['our_citext'], - 'our_inet' : self.inserted_records[0]['our_inet'], - 'our_cidr' : self.inserted_records[0]['our_cidr'], - 'our_mac' : self.inserted_records[0]['our_mac'], - 'our_money' : None - }) - # record 2 - our_ts = datetime.datetime(1987, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 2, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[1]['our_citext'], - 'our_inet' : self.inserted_records[1]['our_inet'], - 'our_cidr' : self.inserted_records[1]['our_cidr'], - 'our_mac' : self.inserted_records[1]['our_mac'], - 'our_money' : '$1,445.57' - }) - # record 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", 'our_text' : - "some text", 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : '1.1', - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(6777777), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'cyclops 1', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_money' : '$1,445.5678' - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567890.01'), - 'our_text': 'some text', - 'our_bit': False, - 'our_integer': 44100, - 'our_double': decimal.Decimal('1.1'), - 'id': 3, - 'our_json': '{"secret": 55}', - 'our_boolean': True, - 'our_jsonb': self.inserted_records[1]['our_jsonb'], - 'our_bigint': 1000000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_varchar': 'our_varchar', - 'our_uuid': self.inserted_records[2]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_10', - 'our_citext': self.inserted_records[2]['our_citext'], - 'our_inet' : self.inserted_records[2]['our_inet'], - 'our_cidr' : self.inserted_records[2]['our_cidr'], - 'our_mac' : self.inserted_records[2]['our_mac'], - 'our_money' : '$1,445.57' - }) - - for rec in self.inserted_records: - db_utils.insert_record(cur, test_table_name, rec) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_incremental_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_incremental_replication_test' } - - @staticmethod - def expected_replication_keys(): - return { - 'postgres_incremental_replication_test' : {'OUR TS TZ'} - } - @staticmethod - def expected_primary_keys(): - return { - 'postgres_incremental_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_incremental_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED' - } - - @staticmethod - def expected_ts_tz(our_ts_tz): - our_ts_tz_utc = our_ts_tz.astimezone(pytz.utc) - expected_value = datetime.datetime.strftime(our_ts_tz_utc, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - @staticmethod - def expected_ts(our_ts): - expected_value = datetime.datetime.strftime(our_ts, "%Y-%m-%dT%H:%M:%S.%f+00:00") - - return expected_value - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode and verify exit codes - check_job_name = runner.run_check_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify basics of discovery are consistent with expectations... - - # verify discovery produced (at least) 1 expected catalog - found_catalogs = [found_catalog for found_catalog in menagerie.get_catalogs(conn_id) - if found_catalog['tap_stream_id'] in self.expected_check_streams()] - self.assertGreaterEqual(len(found_catalogs), 1) - - # verify the tap discovered the expected streams - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - self.assertSetEqual(self.expected_check_streams(), found_catalog_names) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - self.assertEqual(test_table_name, test_catalog['stream_name']) - print("discovered streams are correct") - - # perform table selection - print('selecting {} and all fields within the table'.format(test_table_name)) - schema_and_metadata = menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']) - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'INCREMENTAL', 'replication-key' : 'OUR TS TZ'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, schema_and_metadata, additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - # run sync job 1 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - table_version = records_by_stream[test_table_name]['table_version'] - messages = records_by_stream[test_table_name]['messages'] - - # verify the execpted number of records were replicated - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual(4, len(messages)) - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records match expectations - self.assertDictEqual(self.expected_records[0], messages[1]['data']) - self.assertDictEqual(self.expected_records[1], messages[2]['data']) - self.assertDictEqual(self.expected_records[2], messages[3]['data']) - - # verify records are in ascending order by replication-key value - expected_replication_key = list(self.expected_replication_keys()[test_table_name])[0] - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # grab bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify state and bookmarks meet expectations - self.assertIsNone(state['currently_syncing']) - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(table_version, bookmark['version']) - self.assertEqual(expected_replication_key, bookmark['replication_key']) - self.assertEqual(self.expected_records[2][expected_replication_key], bookmark['replication_key_value']) - - #---------------------------------------------------------------------- - # invoke the sync job AGAIN following various manipulations to the data - #---------------------------------------------------------------------- - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # NB | We will perform the following actions prior to the next sync: - # [Action (EXPECTED RESULT)] - - # Insert a record with a lower replication-key value (NOT REPLICATED) - # Insert a record with a higher replication-key value (REPLICATED) - - # Insert a record with a higher replication-key value and... - # Delete it (NOT REPLICATED) - - # Update a record with a higher replication-key value (REPLICATED) - # Update a record with a lower replication-key value (NOT REPLICATED) - - - # inserting... - # a record with a replication-key value that is lower than the previous bookmark - nyc_tz = pytz.timezone('America/New_York') - our_time_offset = "-04:00" - our_ts = datetime.datetime(1996, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(6,6,6) - our_time_tz = our_time.isoformat() + our_time_offset - our_date = datetime.date(1970, 7, 1) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_citext' : 'cyclops 2', - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': '$0.98789' - }) - self.expected_records.append({ - 'id': 4, - 'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - 'OUR TS' : self.expected_ts(our_ts), - 'OUR TS TZ' : self.expected_ts_tz(our_ts_tz), - 'OUR TIME' : str(our_time), - 'OUR TIME TZ' : str(our_time_tz), - 'OUR DATE' : '1970-07-01T00:00:00+00:00', - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : True, - 'our_json': '{"nymn": 77}', - 'our_jsonb': '{"burgers": "good++"}', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_citext': self.inserted_records[-1]['our_citext'], - 'our_store': {"name" : "betty", "dances" :"floor"}, - 'our_cidr': self.inserted_records[-1]['our_cidr'], - 'our_inet': self.inserted_records[-1]['our_inet'], - 'our_mac': self.inserted_records[-1]['our_mac'], - 'our_money': '$0.99' - }) - # a record with a replication-key value that is higher than the previous bookmark - our_ts = datetime.datetime(2007, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 5, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - # a record with a replication-key value that is higher than the previous bookmark (to be deleted) - our_ts = datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1999, 9, 9) - my_uuid = str(uuid.uuid1()) - self.inserted_records.append({ - 'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_3", - 'our_text' : "some text 4", - 'our_integer' : 55200, - 'our_smallint' : 1, - 'our_bigint' : 100000, - 'our_decimal' : decimal.Decimal('1234567899.99'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : decimal.Decimal('1.1'), - 'our_real' : decimal.Decimal('1.2'), - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps('some string'), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money': None, - }) - self.expected_records.append({ - 'our_decimal': decimal.Decimal('1234567899.99'), - 'our_text': 'some text 4', - 'our_bit': False, - 'our_integer': 55200, - 'our_double': decimal.Decimal('1.1'), - 'id': 6, - 'our_json': self.inserted_records[-1]['our_json'], - 'our_boolean': True, - 'our_jsonb': self.inserted_records[-1]['our_jsonb'], - 'our_bigint': 100000, - 'OUR TS': self.expected_ts(our_ts), - 'OUR TS TZ': self.expected_ts_tz(our_ts_tz), - 'OUR TIME': str(our_time), - 'OUR TIME TZ': str(our_time_tz), - 'our_store': {"name" : "betty", "size" :"small"}, - 'our_smallint': 1, - 'OUR DATE': '1999-09-09T00:00:00+00:00', - 'our_varchar': 'our_varchar 4', - 'our_uuid': self.inserted_records[-1]['our_uuid'], - 'our_real': decimal.Decimal('1.2'), - 'our_varchar_10': 'varchar_3', - 'our_citext' : 'cyclops 3', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - 'our_money' : None - }) - - db_utils.insert_record(cur, test_table_name, self.inserted_records[3]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[4]) - db_utils.insert_record(cur, test_table_name, self.inserted_records[5]) - - # update a record with a replication-key value that is higher than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 1 - our_ts = datetime.datetime(2021, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[0]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[0]["our_double"] = decimal.Decimal("6.6") - self.expected_records[0]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # update a record with a replication-key value that is lower than the previous bookmark - canon_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) - record_pk = 2 - our_ts = datetime.datetime(1990, 4, 4, 4, 4, 4, 733184) - our_ts_tz = nyc_tz.localize(our_ts) - updated_data = { - "OUR TS TZ": our_ts_tz, - "our_double": decimal.Decimal("6.6"), - "our_money": "$0.00" - } - self.expected_records[1]["OUR TS TZ"] = self.expected_ts_tz(our_ts_tz) - self.expected_records[1]["our_double"] = decimal.Decimal("6.6") - self.expected_records[1]["our_money"] = "$0.00" - db_utils.update_record(cur, canon_table_name, record_pk, updated_data) - - # delete a newly inserted record with a higher replication key than the previous bookmark - record_pk = 5 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 2 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # grab records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id, self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were synced - self.assertEqual(3, record_count_by_stream[test_table_name]) - - # verify the message actions match expectations - self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('upsert', messages[1]['action']) - self.assertEqual('upsert', messages[2]['action']) - self.assertEqual('upsert', messages[3]['action']) - - # verify the persisted schema matches expectations - self.assertEqual(expected_schemas[test_table_name], records_by_stream[test_table_name]['schema']) - - # verify replicated records meet our expectations... - - # verify the first record was the bookmarked record from the previous sync - self.assertDictEqual(self.expected_records[2], messages[1]['data']) - - # verify the expected updated record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[0], messages[2]['data']) - - # verify the expected inserted record with a lower replication-key value was NOT replicated - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[3]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the deleted record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[4]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected updated record with a lower replication-key value was NOT replicated - expected_record_id = self.expected_records[1]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify the expected inserted record with a higher replication-key value was replicated - self.assertDictEqual(self.expected_records[5], messages[3]['data']) - - # verify records are in ascending order by replication-key value - self.assertLess(messages[1]['data'][expected_replication_key], messages[2]['data'][expected_replication_key]) - self.assertLess(messages[2]['data'][expected_replication_key], messages[3]['data'][expected_replication_key]) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - #--------------------------------------------------------------------- - # run sync AGAIN after deleting a record and get 1 record (prev bookmark) - #---------------------------------------------------------------------- - - # Delete a pre-existing record from the database - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # delete a record with a lower replication key than the previous sync - record_pk = 1 - db_utils.delete_record(cur, canon_table_name, record_pk) - - # run sync job 3 and verify exit codes - sync_job_name = runner.run_sync_mode(self, conn_id) - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - # get records - record_count_by_stream = runner.examine_target_output_file( - self, conn_id,self.expected_sync_streams(), self.expected_primary_keys() - ) - records_by_stream = runner.get_records_from_target_output() - messages = records_by_stream[test_table_name]['messages'] - - # verify the expected number of records were replicated - self.assertEqual(1, record_count_by_stream[test_table_name]) - - # verify messages match our expectations - self.assertEqual(2, len(messages)) - self.assertEqual(messages[0]['action'], 'activate_version') - self.assertEqual(messages[1]['action'], 'upsert') - self.assertEqual(records_by_stream[test_table_name]['table_version'], table_version) - - # verify replicated records meet our expectations... - - # verify we did not re-replicate the deleted record - actual_record_ids = [message['data']['id'] for message in messages[1:]] - expected_record_id = self.expected_records[0]['id'] - self.assertNotIn(expected_record_id, actual_record_ids) - - # verify only the previously bookmarked record was synced - self.assertDictEqual(self.expected_records[5], messages[1]['data']) - - print("records are correct") - - # get bookmarked state - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_incremental_replication_test'] - - # verify the bookmarked state matches our expectations - self.assertIsNone(bookmark.get('lsn')) - self.assertEqual(bookmark['version'], table_version) - self.assertEqual(bookmark['replication_key'], expected_replication_key) - self.assertEqual(bookmark['replication_key_value'], self.expected_records[5][expected_replication_key]) - - -SCENARIOS.add(PostgresIncrementalTable) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_array_test" - - -MAX_SCALE = 38 -MAX_PRECISION = 100 -expected_schemas = {test_table_name: - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}}, - "sdc_recursive_decimal_12_2_array": {"exclusiveMaximum": True, - "exclusiveMinimum": True, - "type": ['null', "number", "array"], - "items": { - "$ref": "#/definitions/sdc_recursive_decimal_12_2_array" - }, - "minimum": -10000000000, - "multipleOf": decimal.Decimal('0.01'), - "maximum": 10000000000}}, - 'type': 'object', - 'properties': {'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_bit_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_boolean_array': {'items': { '$ref' : '#/definitions/sdc_recursive_boolean_array'},'type': ['null', 'array']}, - 'our_cidr_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_citext_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_date_array': {'items':{ '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_decimal_array' : {'type': ['null', 'array'], 'items': {'$ref' : '#/definitions/sdc_recursive_decimal_12_2_array'}}, - 'our_double_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_enum_array': {'type': ['null', 'array'], 'items': { '$ref' : '#/definitions/sdc_recursive_string_array'}}, - 'our_float_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_hstore_array': {'items': { '$ref' : '#/definitions/sdc_recursive_object_array'},'type': ['null', 'array']}, - 'our_inet_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_int_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_int8_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_json_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_jsonb_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_mac_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_money_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_real_array': {'items': { '$ref' : '#/definitions/sdc_recursive_number_array'},'type': ['null', 'array']}, - 'our_smallint_array': {'items': { '$ref' : '#/definitions/sdc_recursive_integer_array'},'type': ['null', 'array']}, - 'our_string_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_text_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_time_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}, - 'our_ts_tz_array': {'items': { '$ref' : '#/definitions/sdc_recursive_timestamp_array'},'type': ['null', 'array']}, - 'our_uuid_array': {'items': { '$ref' : '#/definitions/sdc_recursive_string_array'},'type': ['null', 'array']}} - }} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql_array = [] - for k in our_keys: - if k == 'our_json_array': - value_sql_array.append("%s::json[]") - elif k == 'our_jsonb_array': - value_sql_array.append("%s::jsonb[]") - else: - value_sql_array.append("%s") - - value_sql = ",".join(value_sql_array) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepArrays(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_bit_array BIT(1)[], - our_boolean_array BOOLEAN[], - our_cidr_array CIDR[], - our_citext_array CITEXT[], - our_date_array DATE[], - our_decimal_array NUMERIC(12,2)[], - our_double_array DOUBLE PRECISION[], - our_enum_array ALIGNMENT[], - our_float_array FLOAT[], - our_hstore_array HSTORE[], - our_inet_array INET[], - our_int_array INTEGER[][], - our_int8_array INT8[], - our_json_array JSON[], - our_jsonb_array JSONB[], - our_mac_array MACADDR[], - our_money_array MONEY[], - our_real_array REAL[], - our_smallint_array SMALLINT[], - our_string_array VARCHAR[], - our_text_array TEXT[], - our_time_array TIME[], - our_ts_tz_array TIMESTAMP WITH TIME ZONE[], - our_uuid_array UUID[]) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_array_test'} - - @staticmethod - def expected_sync_streams(): - return { test_table_name } - - @staticmethod - def expected_pks(): - return { - test_table_name : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_arrays" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = {catalog['tap_stream_id'] for catalog in found_catalogs} - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual(test_table_name, test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { test_table_name: 0}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream[test_table_name]['table_version'] - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream[test_table_name]['messages'][1]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record") - - our_ts_tz = None - our_date = None - our_uuid = str(uuid.uuid1()) - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 2 - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_date = datetime.date(1998, 3, 4) - - self.rec_1 = { - 'our_bit_array' : '{{0,1,1}}', - 'our_boolean_array' : '{true}', - 'our_cidr_array' : '{{192.168.100.128/25}}', - 'our_citext_array' : '{{maGICKal 2}}', - 'our_date_array' : '{{{}}}'.format(our_date), - 'our_decimal_array' : '{{{}}}'.format(decimal.Decimal('1234567890.01')), - 'our_double_array' : '{{1.232323}}', - 'our_enum_array' : '{{bad}}', - 'our_float_array' : '{{5.23}}', - 'our_hstore_array' : """{{"size=>small","name=>betty"}}""", - 'our_inet_array' : '{{192.168.100.128/24}}', - 'our_int_array' : '{{1,2,3},{4,5,6}}', - 'our_int8_array' : '{16,32,64}', - 'our_json_array' : [psycopg2.extras.Json({'secret' : 55})], - 'our_jsonb_array' : [psycopg2.extras.Json({'secret' : 69})], - 'our_mac_array' : '{{08:00:2b:01:02:03}}', - 'our_money_array' : '{{$412.1234}}', - 'our_real_array' : '{{76.33}}', - 'our_smallint_array' : '{{10,20,30},{40,50,60}}', - 'our_string_array' : '{{one string, two strings}}', - 'our_text_array' : '{{three string, four}}', - 'our_time_array' : '{{03:04:05}}', - 'our_ts_tz_array' : '{{{}}}'.format(our_ts_tz), - 'our_uuid_array' : '{{{}}}'.format(our_uuid)} - - - insert_record(cur, test_table_name, self.rec_1) - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { test_table_name: 1 }) - records_by_stream = runner.get_records_from_target_output() - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream[test_table_name]['messages'])) - actual_record_1 = records_by_stream[test_table_name]['messages'][0]['data'] - - expected_inserted_record = {'id': 1, - '_sdc_deleted_at': None, - 'our_bit_array' : [[False, True, True]], - 'our_boolean_array' : [True], - 'our_cidr_array' : [['192.168.100.128/25']], - 'our_citext_array' : [['maGICKal 2']], - 'our_date_array' : ['1998-03-04T00:00:00+00:00'], - 'our_decimal_array' : [decimal.Decimal('1234567890.01')], - 'our_double_array' : [[decimal.Decimal('1.232323')]], - 'our_enum_array' : [['bad']], - 'our_float_array' : [[decimal.Decimal('5.23')]], - 'our_hstore_array' : [[{'size' : 'small' }, {'name' : 'betty'} ]], - 'our_inet_array' : [['192.168.100.128/24']], - 'our_int_array' : [[1,2,3],[4,5,6]], - 'our_int8_array' : [16,32,64], - 'our_json_array' : [json.dumps({'secret' : 55})], - 'our_jsonb_array' : [json.dumps({'secret' : 69})], - 'our_mac_array' : [['08:00:2b:01:02:03']], - 'our_money_array' : [['$412.12']], - 'our_real_array' : [[decimal.Decimal('76.33')]], - 'our_smallint_array' : [[10,20,30],[40,50,60]], - 'our_string_array' : [['one string', 'two strings']], - 'our_text_array' : [['three string', 'four']], - 'our_time_array' : [['03:04:05']], - 'our_ts_tz_array' : ['1997-02-02T07:02:02.722184+00:00'], - 'our_uuid_array' : ['{}'.format(our_uuid)] - - } - - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k in actual_record_1.keys(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream[test_table_name]['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_array_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - -SCENARIOS.add(PostgresLogicalRepArrays) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleDBs(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_dev') """) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch_postgres') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - db_utils.ensure_db('postgres') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_dev') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_dev") - cur2.create_replication_slot('stitch_dev', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_cows]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur))) - - #create dev_cows - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch_postgres') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('postgres', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch_postgres") - cur2.create_replication_slot('stitch_postgres', output_plugin='wal2json') - - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name_chickens]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur))) - - - #create postgres_chickens - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'postgres-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'public_postgres_logical_replication_test_cows', 'public_postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'public_postgres_logical_replication_test_cows' : {'id'}, - 'public_postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_dbs" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'include_schemas_in_destination_stream_name' : 'true', - 'debug_lsn': 'true', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - #run sync job - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['public_postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['public_postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['public_postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 1 more cows and 1 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - - with db_utils.get_test_connection('postgres') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'public_postgres_logical_replication_test_cows': 1, 'public_postgres_logical_replication_test_chickens': 1}) - - upserts = [] - for u in runner.get_upserts_from_target_output(): - self.assertIsNotNone(u.get('_sdc_lsn')) - del u['_sdc_lsn'] - upserts.append(u) - - self.assertEqual([{'_sdc_deleted_at': None, 'cow_age': 21, 'id': 2, 'cow_name': 'betty cow'}, - {'chicken_name': 'burt chicken', '_sdc_deleted_at': None, 'chicken_age': 14, 'id': 2}], - upserts) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['postgres-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleDBs) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test_cows': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}, - - 'postgres_logical_replication_test_chickens': - {'type': 'object', - 'selected': True, - 'properties': {'cow_name': {'selected': True, 'type': ['null', 'string'], 'inclusion': 'available'}, - 'id': {'maximum': 2147483647, 'inclusion': 'automatic', 'type': ['integer'], 'minimum': -2147483648, 'selected': True}, - 'cow_age': {'selected': True, 'type': ['null', 'integer'], 'inclusion': 'available'}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name_cows = "postgres_logical_replication_test_cows" -test_table_name_chickens = "postgres_logical_replication_test_chickens" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRepMultipleTables(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - for t in [test_table_name_cows, test_table_name_chickens]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, t]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, t, cur))) - - - cur = conn.cursor() - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - cow_age integer, - cow_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_cows, cur)) - cur.execute(create_table_sql) - - create_table_sql = """ - CREATE TABLE {} (id SERIAL PRIMARY KEY, - chicken_age integer, - chicken_name varchar) - """.format(canonicalized_table_name(test_schema_name, test_table_name_chickens, cur)) - cur.execute(create_table_sql) - - #insert a cow - self.cows_rec_1 = {'cow_name' : "anne_cow", 'cow_age' : 30} - insert_record(cur, test_table_name_cows, self.cows_rec_1) - - #insert a chicken - self.chickens_rec_1 = {'chicken_name' : "alfred_chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chickens_rec_1) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test_cows', 'dev-public-postgres_logical_replication_test_chickens'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test_cows', 'postgres_logical_replication_test_chickens' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test_cows' : {'id'}, - 'postgres_logical_replication_test_chickens' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_multiple_tables" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 2, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - - test_catalog_cows = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_cows', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_cows', test_catalog_cows['stream_name']) - - - test_catalog_chickens = list(filter( lambda c: c['stream_name'] == 'postgres_logical_replication_test_chickens', found_catalogs))[0] - self.assertEqual('postgres_logical_replication_test_chickens', test_catalog_chickens['stream_name']) - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_cows, - menagerie.get_annotated_schema(conn_id, test_catalog_cows['stream_id']), - additional_md) - connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog_chickens, - menagerie.get_annotated_schema(conn_id, test_catalog_chickens['stream_id']), - additional_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 1, 'postgres_logical_replication_test_chickens': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version_cows = records_by_stream['postgres_logical_replication_test_cows']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_cows']['messages'][2]['action'], 'activate_version') - - table_version_chickens = records_by_stream['postgres_logical_replication_test_chickens']['table_version'] - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['postgres_logical_replication_test_chickens']['messages'][2]['action'], 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - bookmark_cows = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(bookmark_cows['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_cows_1 = bookmark_cows['lsn'] - self.assertEqual(bookmark_cows['version'], table_version_cows, msg="expected bookmark for stream to match version") - - bookmark_chickens = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(bookmark_chickens['lsn'], msg="expected bookmark for stream to have an lsn") - lsn_chickens_1 = bookmark_chickens['lsn'] - self.assertEqual(bookmark_chickens['version'], table_version_chickens, msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding records - #---------------------------------------------------------------------- - print("inserting 2 more cows and 2 more chickens") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - # insert another cow - self.cows_rec_2 = {'cow_name' : "betty cow", 'cow_age' : 21} - insert_record(cur, test_table_name_cows, self.cows_rec_2) - # update that cow's expected values - self.cows_rec_2['id'] = 2 - self.cows_rec_2['_sdc_deleted_at'] = None - - # insert another chicken - self.chicken_rec_2 = {'chicken_name' : "burt chicken", 'chicken_age' : 14} - insert_record(cur, test_table_name_chickens, self.chicken_rec_2) - # update that cow's expected values - self.chicken_rec_2['id'] = 2 - self.chicken_rec_2['_sdc_deleted_at'] = None - - # and repeat... - - self.cows_rec_3 = {'cow_name' : "cindy cow", 'cow_age' : 10} - insert_record(cur, test_table_name_cows, self.cows_rec_3) - self.cows_rec_3['id'] = 3 - self.cows_rec_3['_sdc_deleted_at'] = None - - - self.chicken_rec_3 = {'chicken_name' : "carl chicken", 'chicken_age' : 4} - insert_record(cur, test_table_name_chickens, self.chicken_rec_3) - self.chicken_rec_3['id'] = 3 - self.chicken_rec_3['_sdc_deleted_at'] = None - - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test_cows': 2, 'postgres_logical_replication_test_chickens': 2}) - records_by_stream = runner.get_records_from_target_output() - chicken_messages = records_by_stream["postgres_logical_replication_test_chickens"]['messages'] - cow_messages = records_by_stream["postgres_logical_replication_test_cows"]['messages'] - - self.assertDictEqual(self.cows_rec_2, cow_messages[0]['data']) - self.assertDictEqual(self.chicken_rec_2, chicken_messages[0]['data']) - self.assertDictEqual(self.cows_rec_3, cow_messages[1]['data']) - self.assertDictEqual(self.chicken_rec_3, chicken_messages[1]['data']) - - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - cows_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_cows'] - self.assertIsNotNone(cows_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_cows_2 = cows_bookmark['lsn'] - self.assertTrue(lsn_cows_2 >= lsn_cows_1) - - chickens_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test_chickens'] - self.assertIsNotNone(chickens_bookmark['lsn'], msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_chickens_2 = chickens_bookmark['lsn'] - self.assertTrue(lsn_chickens_2 >= lsn_chickens_1) - - #table_version does NOT change - self.assertEqual(chickens_bookmark['version'], table_version_chickens, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #table_version does NOT change - self.assertEqual(cows_bookmark['version'], table_version_cows, msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - -SCENARIOS.add(PostgresLogicalRepMultipleTables) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db('dev') - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - #insert fixture data 3 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_3 = {'our_varchar' : "our_varchar 3", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_3) - - #insert fixture data 4 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_4 = {'our_varchar' : "our_varchar 4", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_4) - - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '1' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 4}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][4]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][5]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 5") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_5 = {'our_varchar' : "our_varchar 5", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_5) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 5', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_5['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 5, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_5['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_5['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_5['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - # self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - #the 1st message will be the previous insert - insert_message = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - self.assertEqual(set(insert_message.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(insert_message.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in insert_message.items(): - self.assertEqual(v, expected_inserted_record[k], msg="{} != {} for key {}".format(v, expected_inserted_record[k], k)) - - - #the 2nd message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (SELECT ...)' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (SELECT id FROM {} WHERE id=2)".format(canonicalized_table_name(test_schema_name, test_table_name, cur), canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_4 = bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record using the 'id IN (, )' format - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id IN (4, 5)".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #first record will be the previous delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 2) - - - - #the 2nd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - print("deleted record is correct") - - #the 3rd message will be the more recent delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - print("deleted record is correct") - - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_5 = bookmark['lsn'] - self.assertTrue(lsn_5 >= lsn_4) - - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 3 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 3) - #first record will be the previous first delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 4) - - #second record will be the previous second delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][1] - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 5) - - #third record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_6 = chicken_bookmark['lsn'] - self.assertTrue(lsn_6 >= lsn_5) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we will get the previous update record again - self.assertEqual(record_count_by_stream, {'postgres_logical_replication_test': 1}) - # TODO the next line is not grabing the record from the latest sync, opening potential for false negatives - update_message = records_by_stream['postgres_logical_replication_test']['messages'][2] - self.assertEqual(update_message['action'], 'upsert') - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_7 = chicken_bookmark['lsn'] - self.assertTrue(lsn_7 >= lsn_6) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import decimal -import unittest -import datetime -import uuid -import json - -import pytz -import psycopg2.extras -from psycopg2.extensions import quote_ident -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'postgres_logical_replication_test': - {'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }, - 'type': 'object', - 'properties': {'our_boolean': {'type': ['null', 'boolean']}, - '_sdc_deleted_at': {'format': 'date-time', 'type': ['null', 'string']}, - 'OUR TS TZ': {'format' : 'date-time', 'type': ['null', 'string']}, - 'OUR TS': {'format' : 'date-time', 'type': ['null', 'string']}, - 'our_real': {'type': ['null', 'number']}, - 'our_uuid': {'type': ['null', 'string']}, - 'our_store': {'type': ['null', 'object'], 'properties' : {}}, - 'our_smallint': {'maximum': 32767, 'type': ['null', 'integer'], 'minimum': -32768}, - 'our_decimal': {'multipleOf': decimal.Decimal('0.01'), 'type': ['null', 'number'], - 'maximum': 10000000000, 'exclusiveMinimum': True, 'minimum': -10000000000, 'exclusiveMaximum': True}, - 'OUR DATE': {'format': 'date-time', 'type': ['null', 'string']}, - 'our_jsonb': {'type': ['null', 'string']}, - 'our_integer': {'maximum': 2147483647, 'type': ['null', 'integer'], 'minimum': -2147483648}, - 'our_text': {'type': ['null', 'string']}, - 'our_text_2': {'type': ['null', 'string']}, - 'our_json': {'type': ['null', 'string']}, - 'our_double': {'type': ['null', 'number']}, - 'our_varchar': {'type': ['null', 'string']}, - 'our_bigint': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], 'minimum': -9223372036854775808}, - 'id': {'maximum': 2147483647, 'type': ['integer'], 'minimum': -2147483648}, - 'our_varchar_10': {'type': ['null', 'string'], 'maxLength': 10}, - 'OUR TIME': {'type': ['null', 'string']}, - 'OUR TIME TZ': {'type': ['null', 'string']}, - 'our_bit': {'type': ['null', 'boolean']}, - 'our_citext': {'type': ['null', 'string']}, - 'our_cidr': {'type': ['null', 'string']}, - 'our_inet': {'type': ['null', 'string']}, - 'our_mac': {'type': ['null', 'string']}, - 'our_alignment_enum': {'type': ['null', 'string']}, - 'our_money': {'type': ['null', 'string']}}}} - - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - -test_schema_name = "public" -test_table_name = "postgres_logical_replication_test" - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - - -class PostgresLogicalRep(unittest.TestCase): - def tearDown(self): - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) - - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db("dev") - - self.maxDiff = None - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute(""" SELECT EXISTS (SELECT 1 - FROM pg_replication_slots - WHERE slot_name = 'stitch') """) - - old_slot = cur.fetchone()[0] - with db_utils.get_test_connection('dev', True) as conn2: - with conn2.cursor() as cur2: - if old_slot: - cur2.drop_replication_slot("stitch") - cur2.create_replication_slot('stitch', output_plugin='wal2json') - - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s);""", - [test_schema_name, test_table_name]) - old_table = cur.fetchone()[0] - - if old_table: - cur.execute("DROP TABLE {}".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - - cur = conn.cursor() - cur.execute(""" SELECT installed_version FROM pg_available_extensions WHERE name = 'hstore' """) - if cur.fetchone()[0] is None: - cur.execute(""" CREATE EXTENSION hstore; """) - - cur.execute(""" CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;""") - cur.execute(""" DROP TYPE IF EXISTS ALIGNMENT CASCADE """) - cur.execute(""" CREATE TYPE ALIGNMENT AS ENUM ('good', 'bad', 'ugly') """) - - - create_table_sql = """ -CREATE TABLE {} (id SERIAL PRIMARY KEY, - our_varchar VARCHAR, - our_varchar_10 VARCHAR(10), - our_text TEXT, - our_text_2 TEXT, - our_integer INTEGER, - our_smallint SMALLINT, - our_bigint BIGINT, - our_decimal NUMERIC(12,2), - "OUR TS" TIMESTAMP WITHOUT TIME ZONE, - "OUR TS TZ" TIMESTAMP WITH TIME ZONE, - "OUR TIME" TIME WITHOUT TIME ZONE, - "OUR TIME TZ" TIME WITH TIME ZONE, - "OUR DATE" DATE, - our_double DOUBLE PRECISION, - our_real REAL, - our_boolean BOOLEAN, - our_bit BIT(1), - our_json JSON, - our_jsonb JSONB, - our_uuid UUID, - our_store HSTORE, - our_citext CITEXT, - our_cidr cidr, - our_inet inet, - our_mac macaddr, - our_alignment_enum ALIGNMENT, - our_money money) - """.format(canonicalized_table_name(test_schema_name, test_table_name, cur)) - - cur.execute(create_table_sql) - - #insert fixture data 1 - our_ts = datetime.datetime(1997, 2, 2, 2, 2, 2, 722184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(12,11,10) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1998, 3, 4) - my_uuid = str(uuid.uuid1()) - - self.rec_1 = {'our_varchar' : "our_varchar", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44100, - 'our_smallint' : 1, 'our_bigint' : 1000000, - 'our_decimal' : decimal.Decimal('1234567890.01'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '0', - 'our_json' : json.dumps({'secret' : 55}), - 'our_jsonb' : json.dumps(['burgers are good']), - 'our_uuid' : my_uuid, - 'our_store' : 'size=>"small",name=>"betty"', - 'our_citext': 'maGICKal', - 'our_cidr' : '192.168.100.128/25', - 'our_inet': '192.168.100.128/24', - 'our_mac' : '08:00:2b:01:02:03', - 'our_alignment_enum': 'bad'} - - - insert_record(cur, test_table_name, self.rec_1) - - #insert fixture data 2 - our_ts = datetime.datetime(1987, 3, 3, 3, 3, 3, 733184) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(10,9,8) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1964, 7, 1) - my_uuid = str(uuid.uuid1()) - - self.rec_2 = {'our_varchar' : "our_varchar 2", - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text 2", - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 44101, - 'our_smallint' : 2, - 'our_bigint' : 1000001, - 'our_decimal' : decimal.Decimal('9876543210.02'), - quote_ident('OUR TS', cur) : our_ts, - quote_ident('OUR TS TZ', cur) : our_ts_tz, - quote_ident('OUR TIME', cur) : our_time, - quote_ident('OUR TIME TZ', cur) : our_time_tz, - quote_ident('OUR DATE', cur) : our_date, - 'our_double' : 1.1, - 'our_real' : 1.2, - 'our_boolean' : True, - 'our_bit' : '1', - 'our_json' : json.dumps({'nymn' : 77}), - 'our_jsonb' : json.dumps({'burgers' : 'good++'}), - 'our_uuid' : my_uuid, - 'our_store' : 'dances=>"floor",name=>"betty"', - 'our_citext': 'maGICKal 2', - 'our_cidr' : '192.168.101.128/25', - 'our_inet': '192.168.101.128/24', - 'our_mac' : '08:00:2b:01:02:04', - } - - insert_record(cur, test_table_name, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'dev-public-postgres_logical_replication_test'} - - @staticmethod - def expected_sync_streams(): - return { 'postgres_logical_replication_test' } - - @staticmethod - def expected_pks(): - return { - 'postgres_logical_replication_test' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def name(): - return "tap_tester_postgres_logical_replication_v2_message" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'LOG_BASED', - 'logical_poll_total_seconds': '10', - 'wal2json_message_format': '2' - } - - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - - self.assertGreaterEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - test_catalog = found_catalogs[0] - - self.assertEqual('postgres_logical_replication_test', test_catalog['stream_name']) - - print("discovered streams are correct") - - additional_md = [{ "breadcrumb" : [], "metadata" : {'replication-method' : 'LOG_BASED'}}] - #don't selcted our_text_2 - _ = connections.select_catalog_and_fields_via_metadata(conn_id, test_catalog, - menagerie.get_annotated_schema(conn_id, test_catalog['stream_id']), - additional_md, - ['our_text_2']) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 2}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['postgres_logical_replication_test']['table_version'] - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], - 'activate_version') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][1]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][2]['action'], - 'upsert') - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][3]['action'], - 'activate_version') - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream to have an lsn") - lsn_1 = bookmark['lsn'] - - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job again after adding a record - #---------------------------------------------------------------------- - print("inserting a record 3") - - with db_utils.get_test_connection('dev') as conn: - conn.autocommit = True - with conn.cursor() as cur: - #insert fixture data 3 - our_ts = datetime.datetime(1993, 3, 3, 3, 3, 3, 333333) - nyc_tz = pytz.timezone('America/New_York') - our_ts_tz = nyc_tz.localize(our_ts) - our_time = datetime.time(3,4,5) - our_time_tz = our_time.isoformat() + "-04:00" - our_date = datetime.date(1933, 3, 3) - my_uuid = str(uuid.uuid1()) - - #STRINGS: - #OUR TS: '1993-03-03 03:03:03.333333' - #OUR TS TZ: '1993-03-03 08:03:03.333333+00' - #'OUR TIME': '03:04:05' - #'OUR TIME TZ': '03:04:05+00' - self.rec_3 = {'our_varchar' : "our_varchar 3", # str - 'our_varchar_10' : "varchar13", # str - 'our_text' : "some text 3", #str - 'our_text_2' : "NOT SELECTED", - 'our_integer' : 96000, #int - 'our_smallint' : 3, # int - 'our_bigint' : 3000000, #int - 'our_decimal' : decimal.Decimal('1234567890.03'), #1234567890.03 / our_decimal is a - quote_ident('OUR TS', cur) : our_ts, # str '1993-03-03 03:03:03.333333' - quote_ident('OUR TS TZ', cur) : our_ts_tz, #str '1993-03-03 08:03:03.333333+00' - quote_ident('OUR TIME', cur) : our_time, # str '03:04:05' - quote_ident('OUR TIME TZ', cur) : our_time_tz, # str '03:04:05+00' - quote_ident('OUR DATE', cur) : our_date, #1933-03-03 / OUR DATE is a - 'our_double' : 3.3, #3.3 / our_double is a - 'our_real' : 6.6, #6.6 / our_real is a - 'our_boolean' : True, #boolean - 'our_bit' : '1', #string - 'our_json' : json.dumps({'secret' : 33}), #string - 'our_jsonb' : json.dumps(['burgers make me hungry']), - 'our_uuid' : my_uuid, #string - 'our_store' : 'jumps=>"high",name=>"betty"', #string - 'our_citext': 'maGICKal 3', - 'our_cidr' : '192.168.102.128/32', - 'our_inet': '192.168.102.128/32', - 'our_mac' : '08:00:2b:01:02:05', - 'our_money': '$412.1234' - } - - insert_record(cur, test_table_name, self.rec_3) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - self.assertTrue(len(records_by_stream) > 0) - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - self.assertEqual(1, len(records_by_stream['postgres_logical_replication_test']['messages'])) - actual_record_1 = records_by_stream['postgres_logical_replication_test']['messages'][0]['data'] - - expected_inserted_record = {'our_text': 'some text 3', - 'our_real': decimal.Decimal('6.6'), - '_sdc_deleted_at': None, - 'our_store' : {'name' : 'betty', 'jumps' : 'high' }, - 'our_bigint': 3000000, - 'our_varchar': 'our_varchar 3', - 'our_double': decimal.Decimal('3.3'), - 'our_bit': True, - 'our_uuid': self.rec_3['our_uuid'], - 'OUR TS': '1993-03-03T03:03:03.333333+00:00', - 'OUR TS TZ': '1993-03-03T08:03:03.333333+00:00', - 'OUR TIME': '03:04:05', - 'OUR TIME TZ': '03:04:05-04:00', - 'OUR DATE': '1933-03-03T00:00:00+00:00', - 'our_decimal': decimal.Decimal('1234567890.03'), - 'id': 3, - 'our_varchar_10': 'varchar13', - 'our_json': '{"secret": 33}', - 'our_jsonb': self.rec_3['our_jsonb'], - 'our_smallint': 3, - 'our_integer': 96000, - 'our_boolean': True, - 'our_citext': 'maGICKal 3', - 'our_cidr': self.rec_3['our_cidr'], - 'our_inet': '192.168.102.128', - 'our_mac': self.rec_3['our_mac'], - 'our_alignment_enum' : None, - 'our_money' :'$412.12' - } - self.assertEqual(set(actual_record_1.keys()), set(expected_inserted_record.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(actual_record_1.keys()).symmetric_difference(set(expected_inserted_record.keys())))) - - for k,v in actual_record_1.items(): - self.assertEqual(actual_record_1[k], expected_inserted_record[k], msg="{} != {} for key {}".format(actual_record_1[k], expected_inserted_record[k], k)) - - self.assertEqual(records_by_stream['postgres_logical_replication_test']['messages'][0]['action'], 'upsert') - print("inserted record is correct") - - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_2 = chicken_bookmark['lsn'] - - self.assertTrue(lsn_2 >= lsn_1) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - #---------------------------------------------------------------------- - # invoke the sync job again after deleting a record - #---------------------------------------------------------------------- - print("delete row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("DELETE FROM {} WHERE id = 3".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - #the message will be the delete - delete_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(delete_message['action'], 'upsert') - - sdc_deleted_at = delete_message['data'].get('_sdc_deleted_at') - self.assertIsNotNone(sdc_deleted_at) - self.assertEqual(delete_message['data']['id'], 3) - print("deleted record is correct") - - state = menagerie.get_state(conn_id) - bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - - self.assertIsNotNone(bookmark['lsn'], - msg="expected bookmark for stream ROOT-CHICKEN to have an scn") - - lsn_3 = bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(bookmark['version'], table_version, - msg="expected bookmark for stream postgres_logical_replication_test to match version") - #---------------------------------------------------------------------- - # invoke the sync job again after updating a record - #---------------------------------------------------------------------- - print("updating row from source db") - with db_utils.get_test_connection('dev') as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - cur.execute("UPDATE {} SET our_varchar = 'THIS HAS BEEN UPDATED', our_money = '$56.811', our_decimal = 'NaN', our_real = '+Infinity', our_double = 'NaN' WHERE id = 1".format(canonicalized_table_name(test_schema_name, test_table_name, cur))) - - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, { 'postgres_logical_replication_test': 1 }) - records_by_stream = runner.get_records_from_target_output() - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - self.assertEqual(len(records_by_stream['postgres_logical_replication_test']['messages']), 1) - - #record will be the new update - update_message = records_by_stream['postgres_logical_replication_test']['messages'][0] - self.assertEqual(update_message['action'], 'upsert') - - expected_updated_rec = {'our_varchar' : 'THIS HAS BEEN UPDATED', - 'id' : 1, - 'our_varchar_10' : "varchar_10", - 'our_text' : "some text", - 'our_integer' : 44100, - 'our_smallint' : 1, - 'our_bigint' : 1000000, - 'our_decimal' : None, - 'OUR TS': '1997-02-02T02:02:02.722184+00:00', - 'OUR TS TZ' : '1997-02-02T07:02:02.722184+00:00', - 'OUR TIME' : '12:11:10', - 'OUR TIME TZ' : '12:11:10-04:00', - 'OUR DATE': '1998-03-04T00:00:00+00:00', - 'our_double' : None, - 'our_real' : None, - 'our_boolean' : True, - 'our_bit' : False, - 'our_json' : '{"secret": 55}', - 'our_jsonb' : self.rec_1['our_jsonb'], - 'our_uuid' : self.rec_1['our_uuid'], - '_sdc_deleted_at' : None, - 'our_store' : {'name' : 'betty', 'size' : 'small' }, - 'our_citext': 'maGICKal', - 'our_cidr': self.rec_1['our_cidr'], - 'our_inet': self.rec_1['our_inet'], - 'our_mac': self.rec_1['our_mac'], - 'our_alignment_enum' : 'bad', - 'our_money' : '$56.81' - } - - self.assertEqual(set(update_message['data'].keys()), set(expected_updated_rec.keys()), - msg="keys for expected_record_1 are wrong: {}".format(set(update_message['data'].keys()).symmetric_difference(set(expected_updated_rec.keys())))) - - - for k,v in update_message['data'].items(): - self.assertEqual(v, expected_updated_rec[k], msg="{} != {} for key {}".format(v, expected_updated_rec[k], k)) - - print("updated record is correct") - - #check state again - state = menagerie.get_state(conn_id) - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_3 = chicken_bookmark['lsn'] - self.assertTrue(lsn_3 >= lsn_2) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - - - #---------------------------------------------------------------------- - # invoke the sync job one last time. should only get the PREVIOUS update - #---------------------------------------------------------------------- - sync_job_name = runner.run_sync_mode(self, conn_id) - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - #we should not get any records - self.assertEqual(record_count_by_stream, {}) - - #check state again - state = menagerie.get_state(conn_id) - chicken_bookmark = state['bookmarks']['dev-public-postgres_logical_replication_test'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertIsNotNone(chicken_bookmark['lsn'], - msg="expected bookmark for stream public-postgres_logical_replication_test to have an scn") - lsn_4 = chicken_bookmark['lsn'] - self.assertTrue(lsn_4 >= lsn_3) - - #table_version does NOT change - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream public-postgres_logical_replication_test to match version") - -SCENARIOS.add(PostgresLogicalRep) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsFullTable(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_full_table" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER') - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "FULL_TABLE", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - self.assertEqual(records_by_stream['chicken_view']['messages'][2]['action'], 'activate_version') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - - -SCENARIOS.add(PostgresViewsFullTable) -import os -import datetime -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - - -expected_schemas = {'chicken_view': - {'properties': {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'updated_at': {'format': 'date-time', - 'type': ['null', 'string']}}, - 'type': 'object', - 'definitions' : { - 'sdc_recursive_integer_array' : { 'type' : ['null', 'integer', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_integer_array'}}, - 'sdc_recursive_number_array' : { 'type' : ['null', 'number', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_number_array'}}, - 'sdc_recursive_string_array' : { 'type' : ['null', 'string', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_string_array'}}, - 'sdc_recursive_boolean_array' : { 'type' : ['null', 'boolean', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_boolean_array'}}, - 'sdc_recursive_timestamp_array' : { 'type' : ['null', 'string', 'array'], 'format' : 'date-time', 'items' : { '$ref': '#/definitions/sdc_recursive_timestamp_array'}}, - 'sdc_recursive_object_array' : { 'type' : ['null','object', 'array'], 'items' : { '$ref': '#/definitions/sdc_recursive_object_array'}} - }}} - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsIncrementalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - updated_at TIMESTAMP WITH TIME ZONE, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big', 'updated_at' : datetime.datetime(2111, 1, 1, 12, 12, 12, 222111) } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_incremental_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'updated_at'): {'selected-by-default': True, 'inclusion': 'available', 'sql-datatype': 'timestamp with time zone'}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties, updated_at is replication_key - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': 'updated_at', "replication-method" : "INCREMENTAL", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - - self.assertEqual(record_count_by_stream, { 'chicken_view': 1}) - records_by_stream = runner.get_records_from_target_output() - - table_version = records_by_stream['chicken_view']['table_version'] - self.assertEqual(2, len(records_by_stream['chicken_view']['messages'])) - self.assertEqual(records_by_stream['chicken_view']['messages'][0]['action'], 'activate_version') - self.assertEqual(records_by_stream['chicken_view']['messages'][1]['action'], 'upsert') - - # verifications about individual records - for stream, recs in records_by_stream.items(): - # verify the persisted schema was correct - self.assertEqual(recs['schema'], - expected_schemas[stream], - msg="Persisted schema did not match expected schema for stream `{}`.".format(stream)) - - actual_chicken_record = records_by_stream['chicken_view']['messages'][1]['data'] - - expected_chicken_record = {'id': 1, 'fk_id': 1, 'name': 'fred', 'age': 99, 'updated_at': '2111-01-01T12:12:12.222111+00:00', 'size' : 'big'} - self.assertEqual(actual_chicken_record, - expected_chicken_record, - msg="Expected `various_types` upsert record data to be {}, but target output {}".format(expected_chicken_record, actual_chicken_record)) - - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - - chicken_bookmark = state['bookmarks']['postgres-public-chicken_view'] - self.assertIsNone(state['currently_syncing'], msg="expected state's currently_syncing to be None") - self.assertEqual(chicken_bookmark['version'], table_version, - msg="expected bookmark for stream ROOT-CHICKEN to match version") - self.assertEqual(chicken_bookmark['replication_key'], 'updated_at') - self.assertEqual(chicken_bookmark['replication_key_value'],'2111-01-01T12:12:12.222111+00:00') - print("bookmarks are correct") - - # TODO Verify expected fields have inclusion of 'automatic' - -SCENARIOS.add(PostgresViewsIncrementalReplication) -import os -import unittest - -import psycopg2.extras -from psycopg2.extensions import quote_ident -from singer import metadata -from tap_tester.scenario import (SCENARIOS) -import tap_tester.connections as connections -import tap_tester.menagerie as menagerie -import tap_tester.runner as runner - -import db_utils # pylint: disable=import-error - -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} - - -def canonicalized_table_name(schema, table, cur): - return "{}.{}".format(quote_ident(schema, cur), quote_ident(table, cur)) - -def insert_record(cursor, table_name, data): - our_keys = list(data.keys()) - our_keys.sort() - our_values = [data.get(key) for key in our_keys] - - columns_sql = ", \n ".join(our_keys) - value_sql = ",".join(["%s" for i in range(len(our_keys))]) - - insert_sql = """ INSERT INTO {} - ( {} ) - VALUES ( {} )""".format(quote_ident(table_name, cursor), columns_sql, value_sql) - cursor.execute(insert_sql, our_values) - - - -test_schema_name = "public" -test_table_name_1 = "postgres_views_full_table_replication_test" -test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' - -class PostgresViewsLogicalReplication(unittest.TestCase): - def setUp(self): - db_utils.ensure_environment_variables_set() - - db_utils.ensure_db() - - self.maxDiff = None - - with db_utils.get_test_connection() as conn: - conn.autocommit = True - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - for table in [test_table_name_1, test_table_name_2]: - old_table = cur.execute("""SELECT EXISTS ( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = %s - AND table_name = %s)""", - [test_schema_name, table]) - old_table = cur.fetchone()[0] - if old_table: - cur.execute("DROP TABLE {} CASCADE".format(canonicalized_table_name(test_schema_name, table, cur))) - - - cur.execute("""DROP VIEW IF EXISTS {} """.format(quote_ident(test_view, cur))) - cur.execute("""CREATE TABLE {} - (id SERIAL PRIMARY KEY, - name VARCHAR, - size VARCHAR) """.format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - - cur.execute("""CREATE TABLE {} - (fk_id bigint, - age integer) """.format(canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - cur.execute("""CREATE VIEW {} AS - (SELECT * - FROM {} - join {} - on {}.id = {}.fk_id - )""".format(quote_ident(test_view, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur), - canonicalized_table_name(test_schema_name, test_table_name_1, cur), - canonicalized_table_name(test_schema_name, test_table_name_2, cur))) - - self.rec_1 = { 'name' : 'fred', 'size' : 'big' } - insert_record(cur, test_table_name_1, self.rec_1) - - cur.execute("SELECT id FROM {}".format(canonicalized_table_name(test_schema_name, test_table_name_1, cur))) - fk_id = cur.fetchone()[0] - - self.rec_2 = { 'fk_id' : fk_id, 'age' : 99 } - insert_record(cur, test_table_name_2, self.rec_2) - - @staticmethod - def expected_check_streams(): - return { 'postgres-public-chicken_view'} - - @staticmethod - def expected_sync_streams(): - return { 'chicken_view' } - - @staticmethod - def name(): - return "tap_tester_postgres_views_logical_replication" - - @staticmethod - def expected_pks(): - return { - 'chicken_view' : {'id'} - } - - @staticmethod - def tap_name(): - return "tap-postgres" - - @staticmethod - def get_type(): - return "platform.postgres" - - @staticmethod - def get_credentials(): - return {'password': os.getenv('TAP_POSTGRES_PASSWORD')} - - @staticmethod - def get_properties(): - return {'host' : os.getenv('TAP_POSTGRES_HOST'), - 'dbname' : os.getenv('TAP_POSTGRES_DBNAME'), - 'port' : os.getenv('TAP_POSTGRES_PORT'), - 'user' : os.getenv('TAP_POSTGRES_USER'), - 'default_replication_method' : 'FULL_TABLE' - } - - def test_run(self): - conn_id = connections.ensure_connection(self) - - # run in check mode - check_job_name = runner.run_check_mode(self, conn_id) - - # verify check exit codes - exit_status = menagerie.get_exit_status(conn_id, check_job_name) - menagerie.verify_check_exit_status(self, exit_status, check_job_name) - - # verify the tap discovered the right streams - found_catalogs = [fc for fc - in menagerie.get_catalogs(conn_id) - if fc['tap_stream_id'] in self.expected_check_streams()] - - self.assertEqual(len(found_catalogs), - 1, - msg="unable to locate schemas for connection {}".format(conn_id)) - - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) - diff = self.expected_check_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - - # verify that persisted streams have the correct properties - chicken_catalog = found_catalogs[0] - - self.assertEqual('chicken_view', chicken_catalog['stream_name']) - print("discovered streams are correct") - - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') - md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] - - self.assertEqual( - {(): {'database-name': 'postgres', 'is-view': True, 'row-count': 0, 'schema-name': 'public', 'table-key-properties': []}, - ('properties', 'fk_id'): {'inclusion': 'available', 'sql-datatype': 'bigint', 'selected-by-default': True}, - ('properties', 'name'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'age'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}, - ('properties', 'size'): {'inclusion': 'available', 'sql-datatype': 'character varying', 'selected-by-default': True}, - ('properties', 'id'): {'inclusion': 'available', 'sql-datatype': 'integer', 'selected-by-default': True}}, - metadata.to_map(md)) - - - # 'ID' selected as view-key-properties - replication_md = [{"breadcrumb": [], "metadata": {'replication-key': None, "replication-method" : "LOG_BASED", 'view-key-properties': ["id"]}}] - - connections.select_catalog_and_fields_via_metadata(conn_id, chicken_catalog, - menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id']), - replication_md) - - # clear state - menagerie.set_state(conn_id, {}) - - sync_job_name = runner.run_sync_mode(self, conn_id) - - # verify tap and target exit codes - exit_status = menagerie.get_exit_status(conn_id, sync_job_name) - - self.assertEqual(exit_status['tap_exit_status'], 1) - # menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) - - record_count_by_stream = runner.examine_target_output_file(self, - conn_id, - self.expected_sync_streams(), - self.expected_pks()) - - self.assertEqual(record_count_by_stream, {}) - print("records are correct") - - # verify state and bookmarks - state = menagerie.get_state(conn_id) - self.assertEqual(state, {}, msg="expected state to be empty") - - - - -SCENARIOS.add(PostgresViewsLogicalReplication) From 1dcd2bd9dace35375abd47f16bb57c16d634cab6 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Wed, 7 Apr 2021 13:25:09 +0000 Subject: [PATCH 20/26] test datatype against logical replication --- tests/test_postgres_datatypes.py | 37 +++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index 6f045ba..edb2f4e 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -146,10 +146,10 @@ class PostgresDatatypes(unittest.TestCase): def tearDown(self): pass - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) + with db_utils.get_test_connection(test_db) as conn: + conn.autocommit = True + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + cur.execute(""" SELECT pg_drop_replication_slot('stitch') """) def setUp(self): db_utils.ensure_environment_variables_set() @@ -161,7 +161,7 @@ def setUp(self): conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - # db_utils.ensure_replication_slot(cur, test_db) + db_utils.ensure_replication_slot(cur, test_db) canonicalized_table_name = db_utils.canonicalized_table_name(cur, test_schema_name, test_table_name) @@ -1155,9 +1155,19 @@ def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = def test_run(self): """Parametrized datatypes test running against each replication method.""" - self.default_replication_method = self.FULL_TABLE - full_table_conn_id = connections.ensure_connection(self, original_properties=False) - self.datatypes_test(full_table_conn_id) + # TODO paramterize using subtest + for replication_method in {self.FULL_TABLE, self.LOG_BASED}: # self.INCREMENTAL}: + with self.subTest(replication_method=replication_method): + + # set default replication + self.default_replication_method = replication_method + + # grab a new connection + conn_id = connections.ensure_connection(self, original_properties=False) + + # run the test against the new connection + self.datatypes_test(conn_id) + # TODO Parametrize tests to also run against multiple local (db) timezones # with db_utils.get_test_connection(test_db) as conn: @@ -1167,6 +1177,11 @@ def test_run(self): # db_utils.set_db_time_zone('America/New_York') + # self.default_replication_method = self.FULL_TABLE + # full_table_conn_id = connections.ensure_connection(self, original_properties=False) + # self.datatypes_test(full_table_conn_id) + + # self.default_replication_method = self.INCREMENTAL # incremental_conn_id = connections.ensure_connection(self, original_properties=False) # self.datatypes_test(incremental_conn_id) @@ -1228,13 +1243,15 @@ def datatypes_test(self, conn_id): # verify the number of records and number of messages match our expectations expected_record_count = len(self.expected_records) - expected_message_count = expected_record_count + 2 # activate versions + expected_activate_version_count = 1 if self.default_replication_method is self.INCREMENTAL else 2 + expected_message_count = expected_record_count + expected_activate_version_count self.assertEqual(expected_record_count, record_count_by_stream[test_table_name]) self.assertEqual(expected_message_count, len(messages)) # verify we start and end syncs with an activate version message self.assertEqual('activate_version', messages[0]['action']) - self.assertEqual('activate_version', messages[-1]['action']) + if self.default_replication_method in {self.FULL_TABLE, self.LOG_BASED}: + self.assertEqual('activate_version', messages[-1]['action']) # verify the remaining messages are upserts actions = {message['action'] for message in messages if message['action'] != 'activate_version'} From 5d50be3c097072b4f3c238ff04daddef5a1861c6 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Wed, 7 Apr 2021 13:53:45 +0000 Subject: [PATCH 21/26] test datatype against incremental replication --- tests/test_postgres_datatypes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index edb2f4e..5bbb9dd 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -145,7 +145,6 @@ class PostgresDatatypes(unittest.TestCase): default_replication_method = "" def tearDown(self): - pass with db_utils.get_test_connection(test_db) as conn: conn.autocommit = True with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: @@ -1133,7 +1132,7 @@ def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = elif self.default_replication_method is self.INCREMENTAL: additional_md = [{ "breadcrumb": [], "metadata": { - "replication-method": self.INCREMENTAL, "replication-key": "OUR TS" + "replication-method": self.INCREMENTAL, "replication-key": "id" } }] @@ -1156,7 +1155,7 @@ def test_run(self): """Parametrized datatypes test running against each replication method.""" # TODO paramterize using subtest - for replication_method in {self.FULL_TABLE, self.LOG_BASED}: # self.INCREMENTAL}: + for replication_method in {self.FULL_TABLE, self.LOG_BASED, self.INCREMENTAL}: with self.subTest(replication_method=replication_method): # set default replication @@ -1168,6 +1167,8 @@ def test_run(self): # run the test against the new connection self.datatypes_test(conn_id) + print(f"{self.name()} passed using {replication_method} replication.") + # TODO Parametrize tests to also run against multiple local (db) timezones # with db_utils.get_test_connection(test_db) as conn: From 873c3fb333b85936d659bb9f6b03e3362d0c627d Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Wed, 7 Apr 2021 14:10:29 +0000 Subject: [PATCH 22/26] cleanup, test docstring --- tests/test_postgres_datatypes.py | 53 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index 5bbb9dd..894a358 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -1154,7 +1154,6 @@ def select_streams_and_fields(self, conn_id, catalog, select_all_fields: bool = def test_run(self): """Parametrized datatypes test running against each replication method.""" - # TODO paramterize using subtest for replication_method in {self.FULL_TABLE, self.LOG_BASED, self.INCREMENTAL}: with self.subTest(replication_method=replication_method): @@ -1170,35 +1169,39 @@ def test_run(self): print(f"{self.name()} passed using {replication_method} replication.") - # TODO Parametrize tests to also run against multiple local (db) timezones - # with db_utils.get_test_connection(test_db) as conn: - # conn.autocommit = True - # with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: - - # db_utils.set_db_time_zone('America/New_York') - - - # self.default_replication_method = self.FULL_TABLE - # full_table_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(full_table_conn_id) - - - # self.default_replication_method = self.INCREMENTAL - # incremental_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(incremental_conn_id) - - # self.default_replication_method = self.LOG_BASED - # log_based_conn_id = connections.ensure_connection(self, original_properties=False) - # self.datatypes_test(log_based_conn_id) - - def datatypes_test(self, conn_id): """ Test Description: - Basic Datatypes Test for a database tap. + Testing boundary values for all postgres-supported datatypes. Negative testing + for tap-unsupported types. Partition testing for datetime precision. Testing edge + cases for text, numeric/decimal, and datetimes. Test Cases: - + - 'minimum_boundary_general' + - 'maximum_boundary_text' + - 'negative_infinity_floats + - 'positive_infinity_floats' + - 'not_a_number_floats' + - 'not_a_number_numeric' + - 'ipv6_cidr_inet' + - '0_digits_of_precision_datetimes' + - '1_digits_of_precision_datetimes' + - '2_digits_of_precision_datetimes' + - '3_digits_of_precision_datetimes' + - '4_digits_of_precision_datetimes' + - '5_digits_of_precision_datetimes' + - '6_digits_of_precision_datetimes' + - 'near_zero_negative_floats' + - 'near_zero_positive_floats' + - 'zero_floats' + - 'special_characters_hstore' + - 'null_for_all_fields_possible' + - 'out_of_bounds_precision_decimal_and_numeric' + - 'all_ascii_text' + - 'all_unicode_text' + - 'maximum_boundary_varchar' + - 'unsupported_types' + - 'maximum_boundary_general' """ # run discovery (check mode) From b3639d8842191bf2ae22e04d1acfcdfb4ddf0abc Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Wed, 7 Apr 2021 15:41:43 +0000 Subject: [PATCH 23/26] clarify negative test for logical views with 'invalid' in table name --- ...test_postgres_views_logical_replication.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test_postgres_views_logical_replication.py b/tests/test_postgres_views_logical_replication.py index e705132..b389122 100644 --- a/tests/test_postgres_views_logical_replication.py +++ b/tests/test_postgres_views_logical_replication.py @@ -11,16 +11,16 @@ import db_utils # pylint: disable=import-error -expected_schemas = {'chicken_view': {'properties': - {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], - 'minimum': -9223372036854775808}, - 'size': {'type': ['null', 'string']}, - 'name': {'type': ['null', 'string']}, - 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}, - 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], - 'minimum': -2147483648}}, - 'type': 'object'}} +expected_schemas = {'invalid_chicken_view': {'properties': + {'fk_id': {'maximum': 9223372036854775807, 'type': ['null', 'integer'], + 'minimum': -9223372036854775808}, + 'size': {'type': ['null', 'string']}, + 'name': {'type': ['null', 'string']}, + 'id': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}, + 'age': {'maximum': 2147483647, 'type': ['null', 'integer'], + 'minimum': -2147483648}}, + 'type': 'object'}} def canonicalized_table_name(schema, table, cur): @@ -44,7 +44,7 @@ def insert_record(cursor, table_name, data): test_schema_name = "public" test_table_name_1 = "postgres_views_full_table_replication_test" test_table_name_2 = "postgres_views_full_table_replication_test_2" -test_view = 'chicken_view' +test_view = 'invalid_chicken_view' class PostgresViewsLogicalReplication(unittest.TestCase): def setUp(self): @@ -101,11 +101,11 @@ def setUp(self): @staticmethod def expected_check_streams(): - return { 'postgres-public-chicken_view'} + return { 'postgres-public-invalid_chicken_view'} @staticmethod def expected_sync_streams(): - return { 'chicken_view' } + return { 'invalid_chicken_view' } @staticmethod def name(): @@ -114,7 +114,7 @@ def name(): @staticmethod def expected_pks(): return { - 'chicken_view' : {'id'} + 'invalid_chicken_view' : {'id'} } @staticmethod @@ -164,10 +164,10 @@ def test_run(self): # verify that persisted streams have the correct properties chicken_catalog = found_catalogs[0] - self.assertEqual('chicken_view', chicken_catalog['stream_name']) + self.assertEqual('invalid_chicken_view', chicken_catalog['stream_name']) print("discovered streams are correct") - print('checking discoverd metadata for ROOT-CHICKEN_VIEW') + print('checking discoverd metadata for ROOT-INVALID_CHICKEN_VIEW') md = menagerie.get_annotated_schema(conn_id, chicken_catalog['stream_id'])['metadata'] self.assertEqual( From b8ac0cb5a29196dfb9a0aba9fd6d7c7e9e617c46 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Fri, 9 Apr 2021 19:15:21 +0000 Subject: [PATCH 24/26] update comments --- tests/test_postgres_datatypes.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index 894a358..ea28a1a 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -230,18 +230,19 @@ def setUp(self): self.expected_records = dict() - # TODO | BUG_0 | The target blows up with greater than 38 digits before/after the decimal. - # Is this a known/expected behavior or a BUG in the target? - # It prevents us from testing what the tap claims to be able to support - # (100 precision, 38 scale) without rounding AND..The postgres limits WITH rounding. + # BUG_5 | see ticket below + # The target blows up with greater than 38 digits before/after the decimal. + # Is this a known/expected behavior or a BUG in the target? + # It prevents us from testing what the tap claims to be able to support + # (100 precision, 38 scale) without rounding AND..The postgres limits WITH rounding. # insert a record wtih minimum values test_case = 'minimum_boundary_general' min_date = datetime.date(1, 1, 1) my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 38 + '.' + '9' * 38) # CURRENT LIMIT IN TARGET - # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 62 + '.' + '9' * 37) # 131072 + 16383 BUG_0 - # my_absurdly_small_spec_decimal = decimal.Decimal('-' + '9'*500 + '.' + '9'*500) # BUG_0 + # my_absurdly_small_decimal = decimal.Decimal('-' + '9' * 62 + '.' + '9' * 37) # 131072 + 16383 BUG_5 + # my_absurdly_small_spec_decimal = decimal.Decimal('-' + '9'*500 + '.' + '9'*500) # BUG_5 self.inserted_records.append({ 'id': 1, 'our_char': "a", @@ -449,6 +450,9 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + # TODO UPDATE BUG_2 to reflect BUG_2 in HP ! + + # TODO | BUG_2 | We do not preserve datetime precision. # If a record has a decimal value it is padded to 6 digits of precision. # This is not the expected behavior. @@ -758,10 +762,11 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # TODO BUG_5 | The target prevents us from sending a record with numeric/decimal - # values that are out of the max precision of 6 decimal digits. - # The expectation is that values with higher precision than the allowed - # limit, would be rounded and handled. + # BUG_5 | https://stitchdata.atlassian.net/browse/SRCE-5226 + # The target prevents us from sending a record with numeric/decimal + # values that are out of the max precision of 6 decimal digits. + # The expectation is that values with higher precision than the allowed + # limit, would be rounded and handled. # add a record with out-of-bounds precision for DECIMAL/NUMERIC From 9067ab3bcad66d5a2fa374525237351534f1cb27 Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 12 Apr 2021 15:22:30 +0000 Subject: [PATCH 25/26] bug 2 rewritten, addressed PR feedback --- tests/test_postgres_datatypes.py | 97 +++++++++++++++++--------------- 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index ea28a1a..c329112 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -450,12 +450,7 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # TODO UPDATE BUG_2 to reflect BUG_2 in HP ! - - - # TODO | BUG_2 | We do not preserve datetime precision. - # If a record has a decimal value it is padded to 6 digits of precision. - # This is not the expected behavior. + # BUG_2 | We are attaching utc info onto OUR TS which is supposed to be a naive timestamp # add a record with datetimes having .1 second precision @@ -474,10 +469,10 @@ def setUp(self): self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) self.expected_records[test_case].update({ - 'OUR TS': '1996-12-23T19:05:00.100000+00:00', # '1996-12-23T19:05:00.1+00:00', # BUG_2 - 'OUR TS TZ': '1996-12-23T19:05:00.100000+00:00', # '1996-12-23T19:05:00.1+00:00', # BUG_2 - 'OUR TIME': '19:05:00.100000', # '19:05:00.1', # BUG_2 - 'OUR TIME TZ': '19:05:00.100000+00:00', # '19:05:00.1+00:00', # BUG_2 + 'OUR TS': '1996-12-23T19:05:00.100000+00:00', # '1996-12-23T19:05:00.100000', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.100000+00:00', + 'OUR TIME': '19:05:00.100000', + 'OUR TIME TZ': '19:05:00.100000+00:00', }) my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: @@ -502,10 +497,10 @@ def setUp(self): self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) self.expected_records[test_case].update({ - 'OUR TS': '1996-12-23T19:05:00.120000+00:00', # '1996-12-23T19:05:00.12+00:00', # BUG_2 - 'OUR TS TZ': '1996-12-23T19:05:00.120000+00:00', # '1996-12-23T19:05:00.12+00:00', # BUG_2 - 'OUR TIME': '19:05:00.120000', # '19:05:00.12', # BUG_2 - 'OUR TIME TZ': '19:05:00.120000+00:00', # '19:05:00.12+00:00', # BUG_2 + 'OUR TS': '1996-12-23T19:05:00.120000+00:00', # '1996-12-23T19:05:00.120000', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.120000+00:00', + 'OUR TIME': '19:05:00.120000', + 'OUR TIME TZ': '19:05:00.120000+00:00', }) my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: @@ -530,10 +525,10 @@ def setUp(self): self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) self.expected_records[test_case].update({ - 'OUR TS': '1996-12-23T19:05:00.123000+00:00', # '1996-12-23T19:05:00.123+00:00', # BUG_2 - 'OUR TS TZ': '1996-12-23T19:05:00.123000+00:00', # '1996-12-23T19:05:00.123+00:00', # BUG_2 - 'OUR TIME': '19:05:00.123000', # '19:05:00.123', # BUG_2 - 'OUR TIME TZ': '19:05:00.123000+00:00', # '19:05:00.123+00:00', # BUG_2 + 'OUR TS': '1996-12-23T19:05:00.123000+00:00', # '1996-12-23T19:05:00.123000', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123000+00:00', + 'OUR TIME': '19:05:00.123000', + 'OUR TIME TZ': '19:05:00.123000+00:00', }) my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: @@ -558,10 +553,10 @@ def setUp(self): self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) self.expected_records[test_case].update({ - 'OUR TS': '1996-12-23T19:05:00.123400+00:00', # '1996-12-23T19:05:00.1234+00:00', # BUG_2 - 'OUR TS TZ': '1996-12-23T19:05:00.123400+00:00', # '1996-12-23T19:05:00.1234+00:00', # BUG_2 - 'OUR TIME': '19:05:00.123400', # '19:05:00.1234', # BUG_2 - 'OUR TIME TZ': '19:05:00.123400+00:00', # '19:05:00.1234+00:00', # BUG_2 + 'OUR TS': '1996-12-23T19:05:00.123400+00:00', # '1996-12-23T19:05:00.123400', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123400+00:00', + 'OUR TIME': '19:05:00.123400', + 'OUR TIME TZ': '19:05:00.123400+00:00', }) my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: @@ -586,10 +581,10 @@ def setUp(self): self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) self.expected_records[test_case].update({ - 'OUR TS': '1996-12-23T19:05:00.123450+00:00', # '1996-12-23T19:05:00.12345+00:00', # BUG_2 - 'OUR TS TZ': '1996-12-23T19:05:00.123450+00:00', # '1996-12-23T19:05:00.12345+00:00', # BUG_2 - 'OUR TIME': '19:05:00.123450', # '19:05:00.12345', # BUG_2 - 'OUR TIME TZ': '19:05:00.123450+00:00', # '19:05:00.12345+00:00', # BUG_2 + 'OUR TS': '1996-12-23T19:05:00.123450+00:00', # '1996-12-23T19:05:00.123450', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123450+00:00', + 'OUR TIME': '19:05:00.123450', + 'OUR TIME TZ': '19:05:00.123450+00:00', }) my_keys = set(self.expected_records[test_case].keys()) for key in my_keys: @@ -614,7 +609,7 @@ def setUp(self): self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) self.expected_records[test_case].update({ - 'OUR TS': '1996-12-23T19:05:00.123456+00:00', + 'OUR TS': '1996-12-23T19:05:00.123456+00:00', # '1996-12-23T19:05:00.123456', # BUG_2 'OUR TS TZ': '1996-12-23T19:05:00.123456+00:00', 'OUR TIME': '19:05:00.123456', 'OUR TIME TZ': '19:05:00.123456+00:00', @@ -626,6 +621,34 @@ def setUp(self): db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + # add a record with datetimes having .000000001 second (nanosecond) precision + test_case = '9_digits_of_precision_datetimes' + our_serial += 1 + self.inserted_records.append({ + 'id': our_serial, + 'our_bigserial': our_serial, + 'our_serial': our_serial, + 'our_smallserial': our_serial, + quote_ident('OUR TS', cur): '1996-12-23T19:05:00.123456789', + quote_ident('OUR TS TZ', cur): '1996-12-23T19:05:00.123456789+00:00', + quote_ident('OUR TIME', cur): '19:05:00.123456789', + quote_ident('OUR TIME TZ', cur): '19:05:00.123456789+00:00', + }) + self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) + self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) + self.expected_records[test_case].update({ + 'OUR TS': '1996-12-23T19:05:00.123457+00:00', # '1996-12-23T19:05:00.123456', # BUG_2 + 'OUR TS TZ': '1996-12-23T19:05:00.123457+00:00', + 'OUR TIME': '19:05:00.123457', + 'OUR TIME TZ': '19:05:00.123457+00:00', + }) + my_keys = set(self.expected_records[test_case].keys()) + for key in my_keys: + if key.startswith('"'): + del self.expected_records[test_case][key] + db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) + + # TODO | BUG_3 | floating-point precisions can't handle expected # negative value nearest zero boundary @@ -794,24 +817,6 @@ def setUp(self): # db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - # add a record with all extended ascii characters - test_case = 'all_ascii_text' - our_serial += 1 - our_ascii = ''.join(chr(x) for x in range(128) if chr(x) != '\x00') - our_extended_ascii = ''.join(chr(x) for x in range(256) if chr(x) != '\x00') - self.inserted_records.append({ - 'id': our_serial, - 'our_bigserial': our_serial, - 'our_serial': our_serial, - 'our_smallserial': our_serial, - 'our_text': our_ascii, - 'our_text_2': our_extended_ascii, - }) - self.expected_records[test_case] = copy.deepcopy(self.inserted_records[-1]) - self.expected_records[test_case].update(self.null_out_remaining_fields(self.inserted_records[-1])) - db_utils.insert_record(cur, test_table_name, self.inserted_records[-1]) - - # add a record with all unicode characters test_case = 'all_unicode_text' our_serial += 1 @@ -1196,13 +1201,13 @@ def datatypes_test(self, conn_id): - '4_digits_of_precision_datetimes' - '5_digits_of_precision_datetimes' - '6_digits_of_precision_datetimes' + - '9_digits_of_precision_datetimes' - 'near_zero_negative_floats' - 'near_zero_positive_floats' - 'zero_floats' - 'special_characters_hstore' - 'null_for_all_fields_possible' - 'out_of_bounds_precision_decimal_and_numeric' - - 'all_ascii_text' - 'all_unicode_text' - 'maximum_boundary_varchar' - 'unsupported_types' From 852470d5e94b413204654639e833a34d9254d80a Mon Sep 17 00:00:00 2001 From: Kyle Speer Date: Mon, 12 Apr 2021 17:27:08 +0000 Subject: [PATCH 26/26] mark bug for missing datatypes --- tests/test_postgres_datatypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_postgres_datatypes.py b/tests/test_postgres_datatypes.py index c329112..b7ac611 100644 --- a/tests/test_postgres_datatypes.py +++ b/tests/test_postgres_datatypes.py @@ -220,6 +220,7 @@ def setUp(self): unsupported_txid_snapshot TXID_SNAPSHOT, unsupported_xml XML) """.format(canonicalized_table_name) + # BUG_7 | We are not testing COMPOSITE TYPE, RANGE TYPE, or OID. These should be marked as unsupported but are not. cur = db_utils.ensure_fresh_table(conn, cur, test_schema_name, test_table_name) cur.execute(create_table_sql)